From 3284d6129004460107eb94182b9efc32d7793e8e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 9 Oct 2024 17:10:29 +0200 Subject: [PATCH 001/101] add ann tier --- Snakefile | 1 + rules/ann.smk | 50 ++++++++++++++++ rules/evt.smk | 72 ++++++++++++++++++++++- scripts/build_ann.py | 124 +++++++++++++++++++++++++++++++++++++++ scripts/build_evt.py | 18 ++++-- scripts/util/patterns.py | 26 ++++++++ scripts/util/utils.py | 12 ++++ templates/config.json | 2 + 8 files changed, 298 insertions(+), 7 deletions(-) create mode 100644 rules/ann.smk create mode 100644 scripts/build_ann.py diff --git a/Snakefile b/Snakefile index 4738359..5069de0 100644 --- a/Snakefile +++ b/Snakefile @@ -59,6 +59,7 @@ include: "rules/psp.smk" include: "rules/hit.smk" include: "rules/pht.smk" include: "rules/pht_fast.smk" +include: "rules/ann.smk" include: "rules/evt.smk" include: "rules/skm.smk" include: "rules/blinding_calibration.smk" diff --git a/rules/ann.smk b/rules/ann.smk new file mode 100644 index 0000000..f7e6b1c --- /dev/null +++ b/rules/ann.smk @@ -0,0 +1,50 @@ +""" +Snakemake rules for processing ann tier. This is done only for the coax detectors +to apply the ann and risetime cuts for psd. + +""" + +from scripts.util.pars_loading import pars_catalog +from scripts.util.utils import par_dsp_path +from scripts.util.patterns import ( + get_pattern_tier_dsp, + get_pattern_tier_psp, + get_pattern_tier_ann, + get_pattern_tier, + get_pattern_log, + get_pattern_pars, + get_pattern_pars_overwrite, +) + +for tier in ["ann", "pan"]: + + rule: + input: + dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup), + pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, f"{tier}_db"), + log: + get_pattern_log(setup, f"tier_{tier}"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_ann.py')} " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " + + set_last_rule_name(workflow, f"build_{tier}") \ No newline at end of file diff --git a/rules/evt.smk b/rules/evt.smk index ed20d2d..1026d9b 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -9,6 +9,8 @@ from scripts.util.patterns import ( get_pattern_tier_tcm, get_pattern_tier_pht, get_pattern_tier_psp, + get_pattern_tier_pan, + get_pattern_tier_ann, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -31,6 +33,18 @@ for tier in ("evt", "pet"): else get_pattern_tier_pht(setup) ), tcm_file=get_pattern_tier_tcm(setup), + ann_file=lambda wildcards: ( + get_pattern_tier_ann(setup) + if tier == "evt" + else get_pattern_tier_pan(setup) + ), + # needs snakemake >= 8.3 + # ann_file= branch( + # lambda wildcards: tier if int(wildcards["period"][1:]) <= 11 else False, + # cases = {"evt":get_pattern_tier_ann(setup), + # "pet":get_pattern_tier_pan(setup), + # } + # ), xtalk_matrix=lambda wildcards: get_svm_file( tier=tier, wildcards=wildcards, name="xtc" ), @@ -63,10 +77,66 @@ for tier in ("evt", "pet"): "--par_files {input.par_files} " "--hit_file {input.hit_file} " "--tcm_file {input.tcm_file} " + "--ann_file {input.ann_file} " "--dsp_file {input.dsp_file} " "--output {output.evt_file} " - set_last_rule_name(workflow, f"build_{tier}") + set_last_rule_name(workflow, f"build_{tier}_with_ann") + # ann_rule = list(workflow.rules)[-1] + + # rule: + # input: + # dsp_file=( + # get_pattern_tier_dsp(setup) + # if tier == "evt" + # else get_pattern_tier_psp(setup) + # ), + # hit_file=( + # get_pattern_tier_hit(setup) + # if tier == "evt" + # else get_pattern_tier_pht(setup) + # ), + # tcm_file=get_pattern_tier_tcm(setup), + # xtalk_matrix=lambda wildcards: get_svm_file( + # tier=tier, wildcards=wildcards, name="xtc" + # ), + # par_files=lambda wildcards: pars_catalog.get_par_file( + # setup, wildcards.timestamp, "pht" + # ), + # output: + # evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), + # params: + # timestamp="{timestamp}", + # datatype="{datatype}", + # tier=tier, + # log: + # get_pattern_log(setup, f"tier_{tier}"), + # group: + # "tier-evt" + # resources: + # runtime=300, + # mem_swap=50, + # shell: + # "{swenv} python3 -B " + # f"{workflow.source_path('../scripts/build_evt.py')} " + # "--configs {configs} " + # "--metadata {meta} " + # "--log {log} " + # "--tier {params.tier} " + # "--datatype {params.datatype} " + # "--timestamp {params.timestamp} " + # "--xtc_file {input.xtalk_matrix} " + # "--par_files {input.par_files} " + # "--hit_file {input.hit_file} " + # "--tcm_file {input.tcm_file} " + # "--dsp_file {input.dsp_file} " + # "--output {output.evt_file} " + + # set_last_rule_name(workflow, f"build_{tier}") + # no_ann_rule = list(workflow.rules)[-1] + + # rule_order_list = [ann_rule, no_ann_rule] + # workflow._ruleorder.add(*rule_order_list) rule: wildcard_constraints: diff --git a/scripts/build_ann.py b/scripts/build_ann.py new file mode 100644 index 0000000..1f0f67f --- /dev/null +++ b/scripts/build_ann.py @@ -0,0 +1,124 @@ +import argparse +import json +import logging +import os +import pathlib +import re +import time +import warnings + +os.environ["LGDO_CACHE"] = "false" +os.environ["LGDO_BOUNDSCHECK"] = "false" +os.environ["DSPEED_CACHE"] = "false" +os.environ["DSPEED_BOUNDSCHECK"] = "false" + +import lgdo.lh5 as lh5 +import numpy as np +from dspeed import build_dsp +from legendmeta import LegendMetadata +from legendmeta.catalog import Props + + +def replace_list_with_array(dic): + for key, value in dic.items(): + if isinstance(value, dict): + dic[key] = replace_list_with_array(value) + elif isinstance(value, list): + dic[key] = np.array(value, dtype="float32") + else: + pass + return dic + + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + +argparser = argparse.ArgumentParser() +argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) +argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--input", help="input file", type=str) +argparser.add_argument("--output", help="output file", type=str) +argparser.add_argument("--db_file", help="db file", type=str) +args = argparser.parse_args() + +pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("parse").setLevel(logging.INFO) +logging.getLogger("lgdo").setLevel(logging.INFO) +log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][ + "inputs" +]["processing_chain"] + +channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +db_files = [ + par_file + for par_file in args.pars_file + if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" +] + +database_dic = Props.read_from(db_files, subst_pathvar=True) +database_dic = replace_list_with_array(database_dic) + +pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + +rng = np.random.default_rng() +rand_num = f"{rng.integers(0,99999):05d}" +temp_output = f"{args.output}.{rand_num}" + +start = time.time() + +build_dsp( + args.input, + temp_output, + {}, + database=database_dic, + chan_config=channel_dict, + write_mode="r", + buffer_len=3200 if args.datatype == "cal" else 3200, + block_width=16, +) + +log.info(f"build_ann finished in {time.time()-start}") + +os.rename(temp_output, args.output) + +if "ann" in args.output: + key = os.path.basename(args.output).replace("-tier_ann.lh5", "") +else: + key = os.path.basename(args.output).replace("-tier_pan.lh5", "") + +raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + +raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + +outputs = {} +channels = [] +for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + +full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, +} +pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +with open(args.db_file, "w") as w: + json.dump(full_dict, w, indent=4) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 1fcd347..5a808b2 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -35,6 +35,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): argparser.add_argument("--hit_file", help="hit file", type=str) argparser.add_argument("--dsp_file", help="dsp file", type=str) argparser.add_argument("--tcm_file", help="tcm file", type=str) +argparser.add_argument("--ann_file", help="ann file") argparser.add_argument("--xtc_file", help="xtc file", type=str) argparser.add_argument("--par_files", help="par files", nargs="*") @@ -125,13 +126,18 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.output}.{rand_num}" +file_table = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), +} + +if args.ann_file is not None: + file_table["ann"] = (args.ann_file, "dsp", "ch{}") + table = build_evt( - { - "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - }, + file_table, evt_config, ) diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 79bcaac..2629e7e 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -16,10 +16,12 @@ pars_path, plts_path, sandbox_path, + tier_ann_path, tier_daq_path, tier_dsp_path, tier_evt_path, tier_hit_path, + tier_pan_path, tier_path, tier_pet_path, tier_pht_path, @@ -137,6 +139,16 @@ def get_pattern_tier_hit(setup): ) +def get_pattern_tier_ann(setup): + return os.path.join( + f"{tier_ann_path(setup)}", + "{datatype}", + "{period}", + "{run}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_ann.lh5", + ) + + def get_pattern_tier_evt(setup): return os.path.join( f"{tier_evt_path(setup)}", @@ -175,6 +187,16 @@ def get_pattern_tier_pht(setup): ) +def get_pattern_tier_pan(setup): + return os.path.join( + f"{tier_pan_path(setup)}", + "{datatype}", + "{period}", + "{run}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pan.lh5", + ) + + def get_pattern_tier_pet(setup): return os.path.join( f"{tier_pet_path(setup)}", @@ -212,6 +234,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_dsp(setup) elif tier == "hit": file_pattern = get_pattern_tier_hit(setup) + elif tier == "ann": + file_pattern = get_pattern_tier_ann(setup) elif tier == "evt": file_pattern = get_pattern_tier_evt(setup) elif tier == "evt_concat": @@ -220,6 +244,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): file_pattern = get_pattern_tier_psp(setup) elif tier == "pht": file_pattern = get_pattern_tier_pht(setup) + elif tier == "pan": + file_pattern = get_pattern_tier_pan(setup) elif tier == "pet": file_pattern = get_pattern_tier_pet(setup) elif tier == "pet_concat": diff --git a/scripts/util/utils.py b/scripts/util/utils.py index f3f3ebc..5ec88b0 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -53,6 +53,10 @@ def tier_hit_path(setup): return setup["paths"]["tier_hit"] +def tier_ann_path(setup): + return setup["paths"]["tier_ann"] + + def tier_evt_path(setup): return setup["paths"]["tier_evt"] @@ -65,6 +69,10 @@ def tier_pht_path(setup): return setup["paths"]["tier_pht"] +def tier_pan_path(setup): + return setup["paths"]["tier_pan"] + + def tier_pet_path(setup): return setup["paths"]["tier_pet"] @@ -82,12 +90,16 @@ def get_tier_path(setup, tier): return tier_dsp_path(setup) elif tier == "hit": return tier_hit_path(setup) + elif tier == "ann": + return tier_ann_path(setup) elif tier == "evt": return tier_evt_path(setup) elif tier == "psp": return tier_psp_path(setup) elif tier == "pht": return tier_pht_path(setup) + elif tier == "pan": + return tier_pan_path(setup) elif tier == "pet": return tier_pet_path(setup) elif tier == "skm": diff --git a/templates/config.json b/templates/config.json index 7d17f71..a86db97 100644 --- a/templates/config.json +++ b/templates/config.json @@ -19,9 +19,11 @@ "tier_tcm": "$_/generated/tier/tcm", "tier_dsp": "$_/generated/tier/dsp", "tier_hit": "$_/generated/tier/hit", + "tier_ann": "$_/generated/tier/ann", "tier_evt": "$_/generated/tier/evt", "tier_psp": "$_/generated/tier/psp", "tier_pht": "$_/generated/tier/pht", + "tier_pan": "$_/generated/tier/pan", "tier_pet": "$_/generated/tier/pet", "tier_skm": "$_/generated/tier/skm", From 26d52f25c6565cb8cd3af147c0e13dfb61cf1877 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sun, 20 Oct 2024 14:55:31 +0200 Subject: [PATCH 002/101] allow more jobs --- rules/ann.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/ann.smk b/rules/ann.smk index f7e6b1c..ff24820 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -34,7 +34,7 @@ for tier in ["ann", "pan"]: "tier-ann" resources: runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: "{swenv} python3 -B " f"{workflow.source_path('../scripts/build_ann.py')} " From 7918e830a4ce913166787b89f0f526bea7051ea8 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:10:29 +0200 Subject: [PATCH 003/101] pc cleanup --- rules/ann.smk | 10 +++++++--- scripts/build_ann.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rules/ann.smk b/rules/ann.smk index ff24820..64cdd50 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -20,7 +20,11 @@ for tier in ["ann", "pan"]: rule: input: - dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup), + dsp_file=( + get_pattern_tier_dsp(setup) + if tier == "ann" + else get_pattern_tier_psp(setup) + ), pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", @@ -46,5 +50,5 @@ for tier in ["ann", "pan"]: "--output {output.tier_file} " "--db_file {output.db_file} " "--pars_file {input.pars_file} " - - set_last_rule_name(workflow, f"build_{tier}") \ No newline at end of file + + set_last_rule_name(workflow, f"build_{tier}") diff --git a/scripts/build_ann.py b/scripts/build_ann.py index 1f0f67f..224877a 100644 --- a/scripts/build_ann.py +++ b/scripts/build_ann.py @@ -90,7 +90,7 @@ def replace_list_with_array(dic): if "ann" in args.output: key = os.path.basename(args.output).replace("-tier_ann.lh5", "") -else: +else: key = os.path.basename(args.output).replace("-tier_pan.lh5", "") raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] From e9561bdf62f0dc542721643ad8376e105e8b34c5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:10:40 +0200 Subject: [PATCH 004/101] bump pkg versions --- templates/config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/config.json b/templates/config.json index a86db97..9fd0d0f 100644 --- a/templates/config.json +++ b/templates/config.json @@ -55,9 +55,9 @@ "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif" }, "pkg_versions": { - "pygama": "pygama==2.0.1", + "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==0.10.2", - "dspeed": "dspeed==1.4.0a1", + "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.7.0", "legend-daq2lh5": "legend-daq2lh5==1.2.1" } From a3c0dae6588ac4bbaeacabceb8602c3826ef55f2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 21 Oct 2024 23:18:39 +0200 Subject: [PATCH 005/101] add ml packages --- templates/config.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/templates/config.json b/templates/config.json index 9fd0d0f..0d1320d 100644 --- a/templates/config.json +++ b/templates/config.json @@ -59,7 +59,10 @@ "pylegendmeta": "pylegendmeta==0.10.2", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.7.0", - "legend-daq2lh5": "legend-daq2lh5==1.2.1" + "legend-daq2lh5": "legend-daq2lh5==1.2.1", + "tensorflow": "tensorflow==2.17", + "keras": "keras==3.6.0", + "jax": "jax==0.4.30" } } } From 818511da149ae57f954a4a5fa9aaba075e1ddfa2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:15:38 +0100 Subject: [PATCH 006/101] refactor for new metadata, clean up patterns and some naming --- scripts/build_dsp.py | 4 +- scripts/create_chankeylist.py | 2 +- scripts/util/CalibCatalog.py | 128 ------ .../util/{dataset_cal.py => cal_grouping.py} | 13 +- scripts/util/catalog.py | 191 ++++++++ scripts/util/create_pars_keylist.py | 11 +- scripts/util/pars_loading.py | 8 +- scripts/util/patterns.py | 407 +++--------------- scripts/util/utils.py | 134 ++---- 9 files changed, 309 insertions(+), 589 deletions(-) delete mode 100644 scripts/util/CalibCatalog.py rename scripts/util/{dataset_cal.py => cal_grouping.py} (92%) create mode 100644 scripts/util/catalog.py diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 8dad8fa..cbd0794 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -51,9 +51,7 @@ def replace_list_with_array(dic): channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yaml" + par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index c4c6cb9..435f55c 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -20,7 +20,7 @@ chmap = channel_map.channelmaps.on(args.timestamp) channels = [ - f"ch{chmap[chan].daq.rawid:03}" + chan for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] diff --git a/scripts/util/CalibCatalog.py b/scripts/util/CalibCatalog.py deleted file mode 100644 index b222c5d..0000000 --- a/scripts/util/CalibCatalog.py +++ /dev/null @@ -1,128 +0,0 @@ -# -# Copyright (C) 2015 Oliver Schulz -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -This module stores the scripts for leading validity files based on timestamp and system -""" - -import bisect -import collections -import copy -import json -import types -from collections import namedtuple -from pathlib import Path - -from .utils import unix_time - - -class Props: - @staticmethod - def read_from(sources): - def read_impl(sources): - if isinstance(sources, (str, Path)): - file_name = sources - with open(file_name) as file: - return json.load(file) - elif isinstance(sources, list): - result = {} - for p in map(read_impl, sources): - Props.add_to(result, p) - return result - else: - msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" - raise ValueError(msg) - - return read_impl(sources) - - @staticmethod - def add_to(props_a, props_b): - a = props_a - b = props_b - - for key in b: - if key in a: - if isinstance(a[key], dict) and isinstance(b[key], dict): - Props.add_to(a[key], b[key]) - elif a[key] != b[key]: - a[key] = copy.copy(b[key]) - else: - a[key] = copy.copy(b[key]) - - -class PropsStream: - @staticmethod - def get(value): - if isinstance(value, (str, Path)): - return PropsStream.read_from(value) - elif isinstance(value, (collections.abc.Sequence, types.GeneratorType)): - return value - else: - msg = f"Can't get PropsStream from value of type {type(value)}" - raise ValueError(msg) - - @staticmethod - def read_from(file_name): - with open(file_name) as file: - for json_str in file: - yield json.loads(json_str) - - -class CalibCatalog(namedtuple("CalibCatalog", ["entries"])): - __slots__ = () - - class Entry(namedtuple("Entry", ["valid_from", "file"])): - __slots__ = () - - @staticmethod - def read_from(file_name): - entries = {} - - for props in PropsStream.get(file_name): - timestamp = props["valid_from"] - system = "all" if props.get("category") is None else props["category"] - file_key = props["apply"] - if system not in entries: - entries[system] = [] - entries[system].append(CalibCatalog.Entry(unix_time(timestamp), file_key)) - - for system in entries: - entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) - return CalibCatalog(entries) - - def calib_for(self, timestamp, category="all", allow_none=False): - if category in self.entries: - valid_from = [entry.valid_from for entry in self.entries[category]] - pos = bisect.bisect_right(valid_from, unix_time(timestamp)) - if pos > 0: - return self.entries[category][pos - 1].file - else: - if allow_none: - return None - else: - msg = f"No valid calibration found for timestamp: {timestamp}, category: {category}" - raise RuntimeError(msg) - else: - if allow_none: - return None - else: - msg = f"No calibrations found for category: {category}" - raise RuntimeError(msg) - - @staticmethod - def get_calib_files(catalog_file, timestamp, category="all"): - catalog = CalibCatalog.read_from(catalog_file) - return CalibCatalog.calib_for(catalog, timestamp, category) diff --git a/scripts/util/dataset_cal.py b/scripts/util/cal_grouping.py similarity index 92% rename from scripts/util/dataset_cal.py rename to scripts/util/cal_grouping.py index 693e934..aec1572 100644 --- a/scripts/util/dataset_cal.py +++ b/scripts/util/cal_grouping.py @@ -14,12 +14,23 @@ from .utils import filelist_path -class dataset_file: +class cal_grouping: def __init__(self, setup, input_file): with open(input_file) as r: self.datasets = json.load(r) + self.expand_runs() self.setup = setup + def expand_runs(self): + for channel, chan_dict in self.datasets.items(): + for part, part_dict in chan_dict.items(): + for per, runs in part_dict.items(): + if isinstance(runs, str) and ".." in runs: + start, end = runs.split("..") + self.datasets[channel][part][per] = [ + f"r{x:02}" for x in range(int(start[2:]), int(end) + 1) + ] + def get_dataset(self, dataset, channel): partition_dict = self.datasets["default"].copy() if channel in self.datasets: diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py new file mode 100644 index 0000000..1fb516b --- /dev/null +++ b/scripts/util/catalog.py @@ -0,0 +1,191 @@ +# +# Copyright (C) 2015 Oliver Schulz +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This module stores the scripts for leading validity files based on timestamp and system +""" + +import bisect +import collections +import copy +import json +import types +from collections import namedtuple +from pathlib import Path + +import yaml + +from .utils import unix_time + + +class Props: + @staticmethod + def read_from(sources): + def read_impl(sources): + if isinstance(sources, (str, Path)): + file_name = sources + if isinstance(file_name, str): + file_name = Path(file_name) + if file_name.suffix in (".yaml", ".yml"): + with file_name.open() as file: + return yaml.safe_load(file) + elif file_name.suffix == ".json": + with open(file_name) as file: + return json.load(file) + else: + msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" + raise ValueError(msg) + elif isinstance(sources, list): + result = {} + for p in map(read_impl, sources): + Props.add_to(result, p) + return result + else: + msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" + raise ValueError(msg) + + return read_impl(sources) + + @staticmethod + def add_to(props_a, props_b): + a = props_a + b = props_b + + for key in b: + if key in a: + if isinstance(a[key], dict) and isinstance(b[key], dict): + Props.add_to(a[key], b[key]) + elif a[key] != b[key]: + a[key] = copy.copy(b[key]) + else: + a[key] = copy.copy(b[key]) + + +class PropsStream: + """Simple class to control loading of validity.yaml files""" + + @staticmethod + def get(value): + if isinstance(value, str): + return PropsStream.read_from(value) + + if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): + return value + + msg = f"Can't get PropsStream from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + with Path(file_name).open() as r: + file = yaml.safe_load(r) + file = sorted(file, key=lambda item: unix_time(item["valid_from"])) + yield from file + + +class Catalog(namedtuple("Catalog", ["entries"])): + """Implementation of the `YAML metadata validity specification `_.""" + + __slots__ = () + + class Entry(namedtuple("Entry", ["valid_from", "file"])): + __slots__ = () + + @staticmethod + def get(value): + if isinstance(value, Catalog): + return value + + if isinstance(value, str): + return Catalog.read_from(value) + + msg = f"Can't get Catalog from value of type {type(value)}" + raise ValueError(msg) + + @staticmethod + def read_from(file_name): + """Read from a valdiity YAML file and build a Catalog object""" + entries = {} + for props in PropsStream.get(file_name): + timestamp = props["valid_from"] + system = "all" if props.get("category") is None else props["category"] + file_key = props["apply"] + if system not in entries: + entries[system] = [] + mode = "append" if props.get("mode") is None else props["mode"] + mode = "reset" if len(entries[system]) == 0 else mode + if mode == "reset": + new = file_key + elif mode == "append": + new = entries[system][-1].file.copy() + file_key + elif mode == "remove": + new = entries[system][-1].file.copy() + for file in file_key: + new.remove(file) + elif mode == "replace": + new = entries[system][-1].file.copy() + if len(file_key) != 2: + msg = f"Invalid number of elements in replace mode: {len(file_key)}" + raise ValueError(msg) + new.remove(file_key[0]) + new += [file_key[1]] + + else: + msg = f"Unknown mode for {timestamp}" + raise ValueError(msg) + + if timestamp in [entry.valid_from for entry in entries[system]]: + msg = ( + f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" + ) + raise ValueError(msg) + entries[system].append(Catalog.Entry(unix_time(timestamp), new)) + + for system in entries: + entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from) + return Catalog(entries) + + def valid_for(self, timestamp, system="all", allow_none=False): + """Get the valid entries for a given timestamp and system""" + if system in self.entries: + valid_from = [entry.valid_from for entry in self.entries[system]] + pos = bisect.bisect_right(valid_from, unix_time(timestamp)) + if pos > 0: + return self.entries[system][pos - 1].file + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No valid entries found for timestamp: {timestamp}, system: {system}" + raise RuntimeError(msg) + + if system != "all": + return self.valid_for(timestamp, system="all", allow_none=allow_none) + + if allow_none: + return None + + msg = f"No entries found for system: {system}" + raise RuntimeError(msg) + + @staticmethod + def get_files(catalog_file, timestamp, category="all"): + """Helper function to get the files for a given timestamp and category""" + catalog = Catalog.read_from(catalog_file) + return Catalog.valid_for(catalog, timestamp, category) diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 88720ae..2fc3525 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -6,20 +6,20 @@ import json import re import warnings -from typing import ClassVar import snakemake as smk +import yaml from .FileKey import FileKey, ProcessingFileKey from .patterns import par_validity_pattern class pars_key_resolve: - name_dict: ClassVar[dict] = {"cal": ["par_dsp", "par_hit"], "lar": ["par_dsp", "par_hit"]} def __init__(self, valid_from, category, apply): self.valid_from = valid_from self.category = category + self.mode = "reset" self.apply = apply def __str__(self): @@ -34,7 +34,7 @@ def from_filekey(cls, filekey, name_dict): filekey.timestamp, "all", filekey.get_path_from_filekey( - par_validity_pattern(), processing_step=name_dict, ext="json" + par_validity_pattern(), processing_step=name_dict, ext="yaml" ), ) @@ -44,6 +44,11 @@ def write_to_jsonl(file_names, path): for file_name in file_names: of.write(f"{file_name.get_json()}\n") + @staticmethod + def write_to_yaml(file_names, path): + with open(path, "w") as of: + yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) + @staticmethod def match_keys(key1, key2): if ( diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 03f242e..7a9dd87 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -5,14 +5,14 @@ import os -from .CalibCatalog import CalibCatalog +from .catalog import Catalog from .FileKey import ProcessingFileKey # from .patterns import from .utils import get_pars_path, par_overwrite_path -class pars_catalog(CalibCatalog): +class pars_catalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -29,9 +29,9 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.jsonl") + par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml") pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml") pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: pars_files, pars_files_overwrite = pars_catalog.match_pars_files( diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index b60d73f..7f0b30c 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -5,29 +5,16 @@ import os from .utils import ( - par_dsp_path, - par_evt_path, - par_hit_path, + get_pars_path, + get_tier_path, par_overwrite_path, - par_pht_path, - par_psp_path, - par_raw_path, - par_tcm_path, pars_path, plts_path, sandbox_path, tier_daq_path, - tier_dsp_path, - tier_evt_path, - tier_hit_path, tier_path, - tier_pet_path, - tier_pht_path, - tier_psp_path, tier_raw_blind_path, - tier_raw_path, tier_skm_path, - tier_tcm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -87,16 +74,6 @@ def get_pattern_tier_daq(setup): ) -def get_pattern_tier_raw(setup): - return os.path.join( - f"{tier_raw_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_raw.lh5", - ) - - def get_pattern_tier_raw_blind(setup): return os.path.join( f"{tier_raw_blind_path(setup)}", @@ -107,303 +84,55 @@ def get_pattern_tier_raw_blind(setup): ) -def get_pattern_tier_tcm(setup): - return os.path.join( - f"{tier_tcm_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_tcm.lh5", - ) - - -def get_pattern_tier_dsp(setup): - return os.path.join( - f"{tier_dsp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_dsp.lh5", - ) - - -def get_pattern_tier_hit(setup): - return os.path.join( - f"{tier_hit_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_hit.lh5", - ) - - -def get_pattern_tier_evt(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5", - ) - - -def get_pattern_tier_evt_concat(setup): - return os.path.join( - f"{tier_evt_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5", - ) - - -def get_pattern_tier_psp(setup): - return os.path.join( - f"{tier_psp_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_psp.lh5", - ) - - -def get_pattern_tier_pht(setup): - return os.path.join( - f"{tier_pht_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pht.lh5", - ) - - -def get_pattern_tier_pet(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5", - ) - - -def get_pattern_tier_pet_concat(setup): - return os.path.join( - f"{tier_pet_path(setup)}", - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5", - ) - - -def get_pattern_tier_skm(setup): - return os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", - ) - - def get_pattern_tier(setup, tier, check_in_cycle=True): - if tier == "daq": - file_pattern = get_pattern_tier_daq(setup) - elif tier == "raw": - file_pattern = get_pattern_tier_raw(setup) - elif tier == "tcm": - file_pattern = get_pattern_tier_tcm(setup) - elif tier == "dsp": - file_pattern = get_pattern_tier_dsp(setup) - elif tier == "hit": - file_pattern = get_pattern_tier_hit(setup) - elif tier == "evt": - file_pattern = get_pattern_tier_evt(setup) - elif tier == "evt_concat": - file_pattern = get_pattern_tier_evt_concat(setup) - elif tier == "psp": - file_pattern = get_pattern_tier_psp(setup) - elif tier == "pht": - file_pattern = get_pattern_tier_pht(setup) - elif tier == "pet": - file_pattern = get_pattern_tier_pet(setup) - elif tier == "pet_concat": - file_pattern = get_pattern_tier_pet_concat(setup) - elif tier == "skm": - file_pattern = get_pattern_tier_skm(setup) - else: - msg = "invalid tier" - raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) - else: - return file_pattern - - -def get_pattern_par_raw(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_raw_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_raw" + f".{extension}", - ) - - -def get_pattern_par_tcm(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_tcm_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm" + f".{extension}", - ) - - -def get_pattern_par_dsp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_dsp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp" + f".{extension}", - ) - - -def get_pattern_par_hit(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_hit_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_hit" + f".{extension}", - ) - - -def get_pattern_par_evt(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt_" + f"{name}.{extension}", - ) - else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_evt" + f".{extension}", - ) - - -def get_pattern_par_psp(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + file_pattern = os.path.join( + get_tier_path(setup, tier), + "{datatype}", "{period}", "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp_" + f"{name}.{extension}", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5", ) - else: - return os.path.join( - f"{par_psp_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_psp" + f".{extension}", + elif tier in ["evt_concat", "pet_concat"]: + file_pattern = os.path.join( + get_tier_path(setup, tier[:3]), + "{datatype}", + "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5", ) - -def get_pattern_par_pht(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht_" + f"{name}.{extension}", + elif tier == "skm": + file_pattern = os.path.join( + f"{tier_skm_path(setup)}", + "phy", + "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", ) else: - return os.path.join( - f"{par_pht_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pht" + f".{extension}", - ) - - -def get_pattern_par_pet(setup, name=None, extension="json"): - if name is not None: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet_" + f"{name}.{extension}", - ) + msg = "invalid tier" + raise Exception(msg) + if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + return "/tmp/" + os.path.basename(file_pattern) else: - return os.path.join( - f"{par_evt_path(setup)}", - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_pet" + f".{extension}", - ) + return file_pattern -def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=True): - if tier == "raw": - file_pattern = get_pattern_par_raw(setup, name, extension) - elif tier == "tcm": - file_pattern = get_pattern_par_tcm(setup, name, extension) - elif tier == "dsp": - file_pattern = get_pattern_par_dsp(setup, name, extension) - elif tier == "hit": - file_pattern = get_pattern_par_hit(setup, name, extension) - elif tier == "evt": - file_pattern = get_pattern_par_evt(setup, name, extension) - elif tier == "psp": - file_pattern = get_pattern_par_psp(setup, name, extension) - elif tier == "pht": - file_pattern = get_pattern_par_pht(setup, name, extension) - elif tier == "pet": - file_pattern = get_pattern_par_pet(setup, name, extension) +def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): + if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: + if name is not None: + return os.path.join( + get_pars_path(setup, tier), + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}", + ) + else: + file_pattern = os.path.join( + get_pars_path(setup, tier), + "cal", + "{period}", + "{run}", + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}", + ) else: msg = "invalid tier" raise Exception(msg) @@ -419,7 +148,7 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr return file_pattern -def get_pattern_pars_svm(setup, tier, name=None, ext="json"): +def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: return os.path.join( f"{par_overwrite_path(setup)}", @@ -440,7 +169,7 @@ def get_pattern_pars_svm(setup, tier, name=None, ext="json"): ) -def get_pattern_pars_overwrite(setup, tier, name=None): +def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: return os.path.join( f"{par_overwrite_path(setup)}", @@ -449,10 +178,7 @@ def get_pattern_pars_overwrite(setup, tier, name=None): "{period}", "{run}", "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + "_" - + name - + "-overwrite.json", + + f"{tier}_{name}-overwrite.{extension}", ) else: return os.path.join( @@ -461,32 +187,34 @@ def get_pattern_pars_overwrite(setup, tier, name=None): "{datatype}", "{period}", "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + "-overwrite.json", + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}", ) -def get_pattern_pars_tmp(setup, tier, name=None, datatype=None): +def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"): if datatype is None: datatype = "{datatype}" if name is None: return os.path.join( f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json", + "{experiment}-{period}-{run}-" + + datatype + + "-{timestamp}-par_" + + f"{tier}.{extension}", ) else: return os.path.join( f"{tmp_par_path(setup)}", "{experiment}-{period}-{run}-" + datatype - + "-{timestamp}-par_" - + tier - + "_" - + name - + ".json", + + "-{timestamp}" + + f"par_{tier}_{name}.{extension}", ) -def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="json"): +def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: return os.path.join( f"{tmp_par_path(setup)}", @@ -509,11 +237,7 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None): else: return os.path.join( f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + "_" - + name - + ".pkl", + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl", ) @@ -538,19 +262,6 @@ def get_pattern_plts(setup, tier, name=None): ) -def get_energy_grids_pattern_combine(setup): - return os.path.join( - f"{tmp_par_path(setup)}", - "dsp", - "cal", - "{{period}}", - "{{run}}", - "par_dsp_energy_grid", - "{{channel}}", - "{{experiment}}-{{period}}-{{run}}-cal-{{timestamp}}-{{channel}}-{peak}-par_dsp_energy_grid.pkl", - ) - - def get_pattern_log(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", @@ -559,17 +270,17 @@ def get_pattern_log(setup, processing_step): ) -def get_pattern_log_concat(setup, processing_step): +def get_pattern_log_channel(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", ) -def get_pattern_log_channel(setup, processing_step): +def get_pattern_log_concat(setup, processing_step): return os.path.join( f"{tmp_log_path(setup)}", processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", + "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 894d69e..2cb53ef 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -40,135 +40,51 @@ def tier_path(setup): return setup["paths"]["tier"] -def tier_tcm_path(setup): - return setup["paths"]["tier_tcm"] - - -def tier_raw_path(setup): - return setup["paths"]["tier_raw"] - - -def tier_dsp_path(setup): - return setup["paths"]["tier_dsp"] - - -def tier_hit_path(setup): - return setup["paths"]["tier_hit"] - - -def tier_evt_path(setup): - return setup["paths"]["tier_evt"] - - -def tier_psp_path(setup): - return setup["paths"]["tier_psp"] - - -def tier_pht_path(setup): - return setup["paths"]["tier_pht"] - - -def tier_pet_path(setup): - return setup["paths"]["tier_pet"] - - -def tier_skm_path(setup): - return setup["paths"]["tier_skm"] - - def get_tier_path(setup, tier): if tier == "raw": - return tier_raw_path(setup) + return setup["paths"]["tier_raw"] elif tier == "tcm": - return tier_tcm_path(setup) + return setup["paths"]["tier_tcm"] elif tier == "dsp": - return tier_dsp_path(setup) + return setup["paths"]["tier_dsp"] elif tier == "hit": - return tier_hit_path(setup) + return setup["paths"]["tier_hit"] elif tier == "evt": - return tier_evt_path(setup) + return setup["paths"]["tier_evt"] elif tier == "psp": - return tier_psp_path(setup) + return setup["paths"]["tier_psp"] elif tier == "pht": - return tier_pht_path(setup) + return setup["paths"]["tier_pht"] elif tier == "pet": - return tier_pet_path(setup) + return setup["paths"]["tier_pet"] elif tier == "skm": - return tier_skm_path(setup) + return setup["paths"]["tier_skm"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) -def config_path(setup): - return setup["paths"]["config"] - - -def chan_map_path(setup): - return setup["paths"]["chan_map"] - - -def metadata_path(setup): - return setup["paths"]["metadata"] - - -def detector_db_path(setup): - return setup["paths"]["detector_db"] - - -def par_raw_path(setup): - return setup["paths"]["par_raw"] - - -def par_tcm_path(setup): - return setup["paths"]["par_tcm"] - - -def par_dsp_path(setup): - return setup["paths"]["par_dsp"] - - -def par_hit_path(setup): - return setup["paths"]["par_hit"] - - -def par_evt_path(setup): - return setup["paths"]["par_evt"] - - -def par_psp_path(setup): - return setup["paths"]["par_psp"] - - -def par_pht_path(setup): - return setup["paths"]["par_pht"] - - -def par_pet_path(setup): - return setup["paths"]["par_pet"] - - def pars_path(setup): return setup["paths"]["par"] def get_pars_path(setup, tier): if tier == "raw": - return par_raw_path(setup) + return setup["paths"]["par_raw"] elif tier == "tcm": - return par_tcm_path(setup) + return setup["paths"]["par_tcm"] elif tier == "dsp": - return par_dsp_path(setup) + return setup["paths"]["par_dsp"] elif tier == "hit": - return par_hit_path(setup) + return setup["paths"]["par_hit"] elif tier == "evt": - return par_evt_path(setup) + return setup["paths"]["par_evt"] elif tier == "psp": - return par_psp_path(setup) + return setup["paths"]["par_psp"] elif tier == "pht": - return par_pht_path(setup) + return setup["paths"]["par_pht"] elif tier == "pet": - return par_pet_path(setup) + return setup["paths"]["par_pet"] else: msg = f"no tier matching:{tier}" raise ValueError(msg) @@ -190,6 +106,22 @@ def par_overwrite_path(setup): return setup["paths"]["par_overwrite"] +def config_path(setup): + return setup["paths"]["config"] + + +def chan_map_path(setup): + return setup["paths"]["chan_map"] + + +def metadata_path(setup): + return setup["paths"]["metadata"] + + +def detector_db_path(setup): + return setup["paths"]["detector_db"] + + def log_path(setup): return setup["paths"]["log"] From 41c326bca6b596a78c9da886ad76a123c3d1e507 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:22:10 +0100 Subject: [PATCH 007/101] update rules for pattern changes --- Snakefile | 2 +- rules/blinding_calibration.smk | 2 +- rules/blinding_check.smk | 2 +- rules/common.smk | 4 ++-- rules/dsp.smk | 5 +---- rules/evt.smk | 10 +++++----- rules/hit.smk | 5 ++--- rules/pht.smk | 1 - rules/pht_fast.smk | 1 - rules/psp.smk | 2 +- rules/raw.smk | 4 +++- rules/tcm.smk | 3 +-- 12 files changed, 18 insertions(+), 23 deletions(-) diff --git a/Snakefile b/Snakefile index 017f0b1..b2daaa2 100644 --- a/Snakefile +++ b/Snakefile @@ -44,7 +44,7 @@ configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) swenv = runcmd(setup) -part = ds.dataset_file(setup, os.path.join(configs, "partitions.json")) +part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json")) basedir = workflow.basedir diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index ef0a11e..bcf0d64 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -5,7 +5,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps: """ from scripts.util.patterns import ( - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index 653eb3f..ac7240c 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -8,7 +8,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_raw, + get_pattern_pars, get_pattern_plts, get_pattern_pars, ) diff --git a/rules/common.smk b/rules/common.smk index c74f514..b985044 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -8,7 +8,7 @@ from scripts.util.patterns import ( par_raw_path, get_pattern_unsorted_data, get_pattern_tier_daq, - get_pattern_tier_raw, + get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey @@ -114,4 +114,4 @@ def get_tier_pattern(tier): elif tier == "raw": return get_pattern_tier_daq(setup) else: - return get_pattern_tier_raw(setup) + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/dsp.smk b/rules/dsp.smk index 661a990..f8ea4a3 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -13,10 +13,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_dsp, get_pattern_plts, - get_pattern_tier_raw, - get_pattern_tier_tcm, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -386,7 +383,7 @@ rule build_pars_dsp: rule build_dsp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "dsp" diff --git a/rules/evt.smk b/rules/evt.smk index d51ad39..c760b54 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -21,16 +21,16 @@ for tier in ("evt", "pet"): rule: input: dsp_file=( - get_pattern_tier_dsp(setup) + get_pattern_tier(setup, "dsp", check_in_cycle=False) if tier == "evt" - else get_pattern_tier_psp(setup) + else get_pattern_tier(setup, "psp", check_in_cycle=False) ), hit_file=( - get_pattern_tier_hit(setup) + get_pattern_tier(setup, "hit", check_in_cycle=False) if tier == "evt" - else get_pattern_tier_pht(setup) + else get_pattern_tier(setup, "pht", check_in_cycle=False) ), - tcm_file=get_pattern_tier_tcm(setup), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), xtalk_matrix=lambda wildcards: get_svm_file( tier=tier, wildcards=wildcards, name="xtc" ), diff --git a/rules/hit.smk b/rules/hit.smk index fac37a1..f1bb0ba 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -11,9 +11,8 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_hit, + get_pattern_pars, get_pattern_plts, - get_pattern_tier_dsp, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, @@ -297,7 +296,7 @@ rule build_pars_hit: rule build_hit: input: - dsp_file=get_pattern_tier_dsp(setup), + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), pars_file=lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "hit" ), diff --git a/rules/pht.smk b/rules/pht.smk index 86646fa..76542a3 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -13,7 +13,6 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 925d42c..5672011 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -5,7 +5,6 @@ from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/rules/psp.smk b/rules/psp.smk index 9a3e4af..a959cf4 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -337,7 +337,7 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier_raw(setup), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), pars_file=ancient( lambda wildcards: pars_catalog.get_par_file( setup, wildcards.timestamp, "psp" diff --git a/rules/raw.smk b/rules/raw.smk index 20d1105..a81520a 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -43,7 +43,9 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=get_pattern_tier_raw(setup).replace("{datatype}", "phy"), + tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace( + "{datatype}", "phy" + ), blind_file=get_blinding_curve_file, params: timestamp="{timestamp}", diff --git a/rules/tcm.smk b/rules/tcm.smk index 657cda3..c1164bb 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -3,7 +3,6 @@ Snakemake file containing the rules for generating the tcm """ from scripts.util.patterns import ( - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_pars_tmp_channel, @@ -14,7 +13,7 @@ from scripts.util.patterns import ( # This rule builds the tcm files each raw file rule build_tier_tcm: input: - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), params: timestamp="{timestamp}", datatype="{datatype}", From 1698eb1561a8a49d9fd154688f3e01cda8c2cdee Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 15:30:19 +0100 Subject: [PATCH 008/101] add debug mode functionality --- scripts/pars_hit_aoe.py | 4 ++++ scripts/pars_hit_ecal.py | 4 ++++ scripts/pars_hit_lq.py | 4 ++++ scripts/pars_pht_aoecal.py | 4 ++++ scripts/pars_pht_fast.py | 2 ++ scripts/pars_pht_lqcal.py | 4 ++++ scripts/pars_pht_partcal.py | 8 +++++++- 7 files changed, 29 insertions(+), 1 deletion(-) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index ed33f23..be40ed5 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -66,6 +66,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -82,6 +83,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( @@ -116,6 +118,8 @@ def aoe_calibration( argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index d19b427..f7b8be3 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -439,6 +439,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) argparser.add_argument("--results_path", help="results_path", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -565,6 +567,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 20 @@ -575,6 +578,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines, guess, kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False), ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( e_uncal, etol_kev=5 if det_status == "on" else 30, n_sigma=2 diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 5a0ad96..da83623 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -54,6 +54,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -99,6 +100,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -136,6 +138,8 @@ def lq_calibration( argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index bf91d38..8fb2b36 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -92,6 +92,7 @@ def aoe_calibration( dt_param: str = "dt_eff", comptBands_width: int = 20, plot_options: dict | None = None, + debug_mode: bool = False, ): data["AoE_Uncorr"] = data[current_param] / data[energy_param] aoe = CalAoE( @@ -108,6 +109,7 @@ def aoe_calibration( mean_func=mean_func, sigma_func=sigma_func, compt_bands_width=comptBands_width, + debug_mode=debug_mode | args.debug, ) aoe.update_cal_dicts( { @@ -263,6 +265,8 @@ def eres_func(x): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 8210df7..6ab1a4b 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -66,6 +66,8 @@ def run_splitter(files): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 3d5915e..890554f 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -75,6 +75,7 @@ def lq_calibration( cdf: callable = gaussian, selection_string: str = "", plot_options: dict | None = None, + debug_mode: bool = False, ): """Loads in data from the provided files and runs the LQ calibration on said files @@ -119,6 +120,7 @@ def lq_calibration( eres_func, cdf, selection_string, + debug_mode=debug_mode | args.debug, ) data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) @@ -259,6 +261,8 @@ def eres_func(x): argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 0d74ac8..b6f12d7 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -218,7 +218,11 @@ def calibrate_partition( for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): energy = data.query(selection_string)[energy_param].to_numpy() full_object_dict[cal_energy_param] = HPGeCalibration( - energy_param, glines, 1, kwarg_dict.get("deg", 0) # , fixed={1: 1} + energy_param, + glines, + 1, + kwarg_dict.get("deg", 0), + debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, # , fixed={1: 1} ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( energy, @@ -426,6 +430,8 @@ def calibrate_partition( argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") From b8404444ee8fab5fbac4f871f6c8f535906c82d3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 27 Nov 2024 18:02:08 +0100 Subject: [PATCH 009/101] os to pathlib.Path --- .ruff.toml | 2 +- scripts/blinding_calibration.py | 3 +- scripts/build_dsp.py | 13 +- scripts/build_evt.py | 5 +- scripts/build_hit.py | 11 +- scripts/build_raw.py | 10 +- scripts/build_raw_blind.py | 9 +- scripts/build_skm.py | 5 +- scripts/build_tcm.py | 7 +- scripts/check_blinding.py | 9 +- scripts/complete_run.py | 49 +++--- scripts/create_chankeylist.py | 7 +- scripts/merge_channels.py | 35 ++-- scripts/par_psp.py | 18 +- scripts/pars_dsp_build_svm.py | 3 +- scripts/pars_dsp_dplms.py | 17 +- scripts/pars_dsp_eopt.py | 17 +- scripts/pars_dsp_event_selection.py | 15 +- scripts/pars_dsp_nopt.py | 13 +- scripts/pars_dsp_svm.py | 9 +- scripts/pars_dsp_tau.py | 13 +- scripts/pars_hit_aoe.py | 21 ++- scripts/pars_hit_ecal.py | 21 +-- scripts/pars_hit_lq.py | 21 ++- scripts/pars_hit_qc.py | 11 +- scripts/pars_pht_aoecal.py | 37 ++-- scripts/pars_pht_fast.py | 39 ++-- scripts/pars_pht_lqcal.py | 41 +++-- scripts/pars_pht_partcal.py | 35 ++-- scripts/pars_pht_qc.py | 19 +- scripts/pars_pht_qc_phy.py | 13 +- scripts/pars_tcm_pulser.py | 7 +- scripts/util/FileKey.py | 6 +- scripts/util/cal_grouping.py | 25 ++- scripts/util/catalog.py | 2 +- scripts/util/create_pars_keylist.py | 8 +- scripts/util/pars_loading.py | 11 +- scripts/util/patterns.py | 264 +++++++++++++++------------- scripts/util/utils.py | 6 +- tests/test_util.py | 19 +- 40 files changed, 431 insertions(+), 445 deletions(-) diff --git a/.ruff.toml b/.ruff.toml index 29f8014..8b4d420 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -12,7 +12,7 @@ lint.select = [ "PIE", # flake8-pie "PL", # pylint "PT", # flake8-pytest-style - # "PTH", # flake8-use-pathlib + "PTH", # flake8-use-pathlib "RET", # flake8-return "RUF", # Ruff-specific "SIM", # flake8-simplify diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 6a1b0a7..62207e9 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -7,6 +7,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -93,7 +94,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index cbd0794..02bf6a1 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,6 +1,5 @@ import argparse import logging -import os import pathlib import re import time @@ -37,7 +36,7 @@ def replace_list_with_array(dic): argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -51,13 +50,13 @@ def replace_list_with_array(dic): channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} db_files = [ - par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml") + par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -78,9 +77,9 @@ def replace_list_with_array(dic): log.info(f"build_dsp finished in {time.time()-start}") -os.rename(temp_output, args.output) +pathlib.Path(temp_output).rename(args.output) -key = os.path.basename(args.output).replace("-tier_dsp.lh5", "") +key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "") raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] @@ -109,5 +108,5 @@ def replace_list_with_array(dic): }, "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 3d993d8..6927c24 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,7 +1,6 @@ import argparse import json import logging -import os import time from pathlib import Path @@ -51,7 +50,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): args = argparser.parse_args() if args.log is not None: - Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -118,7 +117,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): log.debug(json.dumps(evt_config["channels"], indent=2)) t_start = time.time() -Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) table = build_evt( { diff --git a/scripts/build_hit.py b/scripts/build_hit.py index c550337..8e2da80 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,8 +1,7 @@ import argparse import logging -import os -import pathlib import time +from pathlib import Path from legendmeta import TextDB from legendmeta.catalog import Props @@ -24,7 +23,7 @@ argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -59,7 +58,7 @@ hit_dict[f"{channel}/dsp"] = chan_pars t_start = time.time() -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) t_elap = time.time() - t_start log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") @@ -80,12 +79,12 @@ } hit_channels.append(channel) -key = os.path.basename(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, } -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index c02b67b..03a4fca 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numpy as np from daq2lh5 import build_raw @@ -18,10 +17,10 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][ @@ -83,4 +82,5 @@ build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) -os.rename(temp_output, args.output) +# rename the temp file +Path(temp_output).rename(args.output) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0400f22..33a6c31 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -12,8 +12,7 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import numexpr as ne import numpy as np @@ -35,11 +34,11 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("lgdo").setLevel(logging.INFO) -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype) @@ -167,4 +166,4 @@ ) # rename the temp file -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index a327caa..10bf876 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import awkward as ak from legendmeta import TextDB @@ -32,7 +31,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): args = argparser.parse_args() if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index c39faea..2ceb3ab 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -21,7 +20,7 @@ logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] @@ -50,4 +49,4 @@ **settings, ) -os.rename(temp_output, args.output) +Path(temp_output).rename(args.output) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 4d8a6fa..7d6da04 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -8,9 +8,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import matplotlib as mpl import matplotlib.pyplot as plt @@ -40,7 +39,7 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -os.makedirs(os.path.dirname(args.log), exist_ok=True) +Path(args.log).parent.makedir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) @@ -85,7 +84,7 @@ ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) -with open(args.plot_file, "wb") as w: +with Path(args.plot_file).open("wb") as w: pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() @@ -93,7 +92,7 @@ # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False: - pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) + Path(args.output).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output, {}) else: msg = "peaks not found in daqenergy" diff --git a/scripts/complete_run.py b/scripts/complete_run.py index f61ba37..fe800e8 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -1,7 +1,6 @@ # ruff: noqa: F821, T201 import datetime -import glob import json import os import time @@ -20,14 +19,14 @@ def as_ro(path): def check_log_files(log_path, output_file, gen_output, warning_file=None): now = datetime.datetime.now(datetime.UTC).strftime("%d/%m/%y %H:%M") - os.makedirs(os.path.dirname(output_file), exist_ok=True) + Path(output_file).parent.mkdir(parents=True, exist_ok=True) if warning_file is not None: - os.makedirs(os.path.dirname(warning_file), exist_ok=True) - with open(warning_file, "w") as w, open(output_file, "w") as f: + Path(warning_file).parent.mkdir(parents=True, exist_ok=True) + with Path(warning_file).open("w") as w, Path(output_file).open("w") as f: n_errors = 0 n_warnings = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text or "WARNING" in text: for line in text.splitlines(): @@ -40,24 +39,24 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): w.write( f"{gen_output} successfully generated at {now} with warnings \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 elif "WARNING" in line: - w.write(f"{os.path.basename(file)} : {line}\n") + w.write(f"{Path(file).name} : {line}\n") n_warnings += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") if n_warnings == 0: w.write(f"{gen_output} successfully generated at {now} with no warnings \n") else: - with open(output_file, "w") as f: + with Path(output_file).open("w") as f: n_errors = 0 for file in Path(log_path).rglob("*.log"): - with open(file) as r: + with Path(file).open() as r: text = r.read() if "ERROR" in text: for line in text.splitlines(): @@ -66,18 +65,18 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): f.write( f"{gen_output} successfully generated at {now} with errors \n" ) - f.write(f"{os.path.basename(file)} : {line}\n") + f.write(f"{Path(file).name} : {line}\n") n_errors += 1 else: pass - os.remove(file) + Path(file).unlink() text = None if n_errors == 0: f.write(f"{gen_output} successfully generated at {now} with no errors \n") walk = list(os.walk(log_path)) for path, _, _ in walk[::-1]: if len(os.listdir(path)) == 0: - os.rmdir(path) + Path(path).rmdir() def add_spaces(n): @@ -124,7 +123,7 @@ def get_run(Filekey): key_dict = {} for file in files: - key = FileKey.get_filekey_from_filename(os.path.basename(file)) + key = FileKey.get_filekey_from_filename(Path(file).name) if get_run(key) in key_dict: key_dict[get_run(key)].append(file) else: @@ -133,24 +132,24 @@ def get_run(Filekey): def build_valid_keys(input_files, output_dir): - infiles = glob.glob(as_ro(input_files)) + infiles = Path(as_ro(input_files)).glob() key_dict = get_keys(infiles) for key in list(key_dict): dtype = key.split("-")[-1] - out_file = os.path.join(output_dir, f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json') - Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True) - if os.path.isfile(out_file): + out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json' + out_file.parent.mkdir(parents=True, exist_ok=True) + if Path(out_file).is_file(): out_dict = Props.read_from([out_file] + key_dict[key]) else: out_dict = Props.read_from(key_dict[key]) out_string = readable_json(out_dict) - with open(out_file, "w") as w: + with Path(out_file).open("w") as w: w.write(out_string) for input_file in infiles: - if os.path.isfile(input_file): - os.remove(input_file) + if Path(input_file).is_file(): + Path(input_file).unlink() def find_gen_runs(gen_tier_path): @@ -268,16 +267,16 @@ def fformat(tier): if snakemake.wildcards.tier != "daq": print(f"INFO: ...building FileDBs with {snakemake.threads} threads") - os.makedirs(snakemake.params.filedb_path, exist_ok=True) + Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True) - with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as f: + with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f: json.dump(file_db_config, f, indent=2) build_file_dbs(ut.tier_path(snakemake.params.setup), snakemake.params.filedb_path) - os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json")) + (Path(snakemake.params.filedb_path) / "file_db_config.json").unlink() build_valid_keys( - os.path.join(ut.tmp_par_path(snakemake.params.setup), "*_db.json"), + Path(ut.tmp_par_path(snakemake.params.setup)) / "*_db.json", snakemake.params.valid_keys_path, ) diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index 435f55c..6ed4510 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -1,6 +1,5 @@ import argparse -import os -import pathlib +from pathlib import Path from legendmeta import LegendMetadata, TextDB @@ -25,7 +24,7 @@ if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) -with open(args.output_file, "w") as f: +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) +with Path(args.output_file).open("w") as f: for chan in channels: f.write(f"{chan}\n") diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index a86d47d..e8994be 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -1,8 +1,7 @@ import argparse -import os -import pathlib import pickle as pkl import shelve +from pathlib import Path import numpy as np from legendmeta.catalog import Props @@ -19,7 +18,7 @@ def replace_path(d, old_path, new_path): d[i] = replace_path(d[i], old_path, new_path) elif isinstance(d, str) and old_path in d: d = d.replace(old_path, new_path) - d = d.replace(new_path, f"$_/{os.path.basename(new_path)}") + d = d.replace(new_path, f"$_/{Path(new_path).name}") return d @@ -45,25 +44,25 @@ def replace_path(d, old_path, new_path): channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input -file_extension = pathlib.Path(args.output).suffix +file_extension = Path(args.output).suffix if file_extension == ".dat" or file_extension == ".dir": - out_file = os.path.splitext(args.output)[0] + out_file = Path(args.output).with_suffix("") else: out_file = args.output rng = np.random.default_rng() temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": out_dict = {} for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: + if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel out_dict[channel_name] = channel_dict else: @@ -72,29 +71,29 @@ def replace_path(d, old_path, new_path): Props.write_to(temp_output, out_dict, "json") - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".pkl": out_dict = {} for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel out_dict[channel_name] = channel_dict - with open(temp_output, "wb") as w: + with Path(temp_output).open("wb") as w: pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: - with open(channel, "rb") as r: + with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") @@ -108,8 +107,8 @@ def replace_path(d, old_path, new_path): if args.in_db: db_dict = Props.read_from(args.in_db) for channel in channel_files: - if pathlib.Path(channel).suffix == file_extension: - fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel)) + if Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) @@ -128,4 +127,4 @@ def replace_path(d, old_path, new_path): if args.out_db: Props.write_to(args.out_db, db_dict) - os.rename(temp_output, out_file) + Path(temp_output).rename(out_file) diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 52c2ed6..94473a0 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -1,7 +1,7 @@ import argparse -import os import pickle as pkl from datetime import datetime +from pathlib import Path import matplotlib as mpl import matplotlib.dates as mdates @@ -44,7 +44,7 @@ # partitions could be different for different channels - do separately for each channel in_dicts = {} for file in args.input: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp in_dicts[tstamp] = Props.read_from(file) plot_dict = {} @@ -109,36 +109,36 @@ plt.close() for file in args.output: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp Props.write_to(file, in_dicts[tstamp]) if args.out_plots: for file in args.out_plots: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_plots: for infile in args.in_plots: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_plot_dict = pkl.load(f) break old_plot_dict.update({"psp": plot_dict}) new_plot_dict = old_plot_dict else: new_plot_dict = {"psp": plot_dict} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) if args.out_obj: for file in args.out_obj: - tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp if args.in_obj: for infile in args.in_obj: if tstamp in infile: - with open(infile, "rb") as f: + with Path(infile).open("rb") as f: old_obj_dict = pkl.load(f) break new_obj_dict = old_obj_dict else: new_obj_dict = {} - with open(file, "wb") as f: + with Path(file).open("wb") as f: pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index df97320..0d6ada7 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -1,6 +1,7 @@ import argparse import logging import pickle as pkl +from pathlib import Path from legendmeta.catalog import Props from lgdo import lh5 @@ -45,5 +46,5 @@ log.debug("trained model") # Save trained model with pickle -with open(args.output_file, "wb") as svm_file: +with Path(args.output_file).open("wb") as svm_file: pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index f643e03..607613c 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -52,7 +51,7 @@ db_dict = Props.read_from(args.database) if dplms_dict["run_dplms"] is True: - with open(args.fft_raw_filelist) as f: + with Path(args.fft_raw_filelist).open() as f: fft_files = sorted(f.read().splitlines()) t0 = time.time() @@ -91,7 +90,7 @@ display=1, ) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) inplot_dict.update({"dplms": plot_dict}) @@ -115,14 +114,14 @@ out_dict = {} dplms_pars = Table(col_dict={"coefficients": Array([])}) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: inplot_dict = pkl.load(r) else: inplot_dict = {} db_dict.update(out_dict) -pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True) +Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), name=args.channel, @@ -130,10 +129,10 @@ wo_mode="overwrite", ) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, db_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 0edf617..bcda090 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -1,10 +1,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -351,19 +350,19 @@ else: db_dict.update({"ctc_params": out_alpha_dict}) - pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True) - with open(args.qbb_grid_path, "wb") as f: + Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.qbb_grid_path).open("wb") as f: pkl.dump(optimisers, f) else: - pathlib.Path(args.qbb_grid_path).touch() + Path(args.qbb_grid_path).touch() -pathlib.Path(os.path.dirname(args.final_dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.final_dsp_pars, db_dict) if args.plot_path: if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: plot_dict = pkl.load(r) else: plot_dict = {} @@ -383,6 +382,6 @@ "acq_space": bopt_zac.plot_acq(init_samples=sample_x), } - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as w: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as w: pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index ea2bb34..2e6505b 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -1,11 +1,10 @@ import argparse import json import logging -import os -import pathlib import time import warnings from bisect import bisect_left +from pathlib import Path import lgdo import lgdo.lh5 as lh5 @@ -121,14 +120,14 @@ def get_out_data( peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) - pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True) + Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.peak_file}.{rand_num}" - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -138,7 +137,7 @@ def get_out_data( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -225,7 +224,7 @@ def get_out_data( } for file in raw_files: - log.debug(os.path.basename(file)) + log.debug(Path(file).name) for peak, peak_dict in pk_dicts.items(): if peak_dict["idxs"] is not None: # idx is a long continuous array @@ -358,7 +357,7 @@ def get_out_data( log.debug(f"{peak} has reached the required number of events") else: - pathlib.Path(temp_output).touch() + Path(temp_output).touch() log.debug(f"event selection completed in {time.time()-t0} seconds") - os.rename(temp_output, args.peak_file) + Path(temp_output).rename(args.peak_file) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 67ffd5f..47261d2 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -1,9 +1,8 @@ import argparse import logging -import os -import pathlib import pickle as pkl import time +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -57,7 +56,7 @@ db_dict = Props.read_from(args.database) if opt_dict.pop("run_nopt") is True: - with open(args.raw_filelist) as f: + with Path(args.raw_filelist).open() as f: files = f.read().splitlines() raw_files = sorted(files) @@ -96,15 +95,15 @@ plot_dict = {} if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: old_plot_dict = pkl.load(r) plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) else: plot_dict = {"noise_optimisation": plot_dict} - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True) +Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 28b335e..370e320 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path from legendmeta.catalog import Props @@ -14,7 +13,7 @@ if args.log is not None: - pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) + Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") else: logging.basicConfig(level=logging.DEBUG) @@ -27,9 +26,9 @@ par_data = Props.read_from(args.input_file) -file = f"'$_/{os.path.basename(args.svm_file)}'" +file = f"'$_/{Path(args.svm_file).name}'" par_data["svm"] = {"model_file": file} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, par_data) diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index c4750c6..82cec2d 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,8 +1,7 @@ import argparse import logging -import os -import pathlib import pickle as pkl +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -52,7 +51,7 @@ kwarg_dict.pop("run_tau") if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist": input_file = args.raw_files[0] - with open(input_file) as f: + with Path(input_file).open() as f: input_file = f.read().splitlines() else: input_file = args.raw_files @@ -63,7 +62,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -113,17 +112,17 @@ tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) plot_dict = tau.plot_waveforms_after_correction( tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") ) plot_dict.update(tau.plot_slopes(slopes[idxs])) - with open(args.plot_path, "wb") as f: + with Path(args.plot_path).open("wb") as f: pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) else: out_dict = {} -pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True) +Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output_file, tau.output_dict) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index be40ed5..a393868 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -142,7 +141,7 @@ def aoe_calibration( cal_dict = ecal_dict["pars"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_aoe"] is True: @@ -158,7 +157,7 @@ def aoe_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -210,7 +209,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -246,7 +245,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"aoe": plot_dict}) else: @@ -257,11 +256,11 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) results_dict = dict(**ecal_dict["results"], aoe=out_dict) final_hit_dict = { "pars": {"operations": cal_dict}, @@ -269,10 +268,10 @@ def eres_func(x): } Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True) +Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, aoe=obj, ) -with open(args.aoe_results, "wb") as w: +with Path(args.aoe_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index f7b8be3..b310500 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import warnings from datetime import datetime +from pathlib import Path import lgdo.lh5 as lh5 import matplotlib as mpl @@ -462,9 +461,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): db_files = [ par_file for par_file in args.ctc_dict - if os.path.splitext(par_file)[1] == ".json" - or os.path.splitext(par_file)[1] == ".yml" - or os.path.splitext(par_file)[1] == ".yaml" + if Path(par_file).suffix in (".json", ".yml", ".yaml") ] database_dic = Props.read_from(db_files) @@ -493,7 +490,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): bl_plots[field]["function"] = eval(item["function"]) common_plots = kwarg_dict.pop("common_plots") - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -514,7 +511,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -725,7 +722,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): common_dict.update({key: param_dict}) if args.inplot_dict: - with open(args.inplot_dict, "rb") as f: + with Path(args.inplot_dict).open("rb") as f: total_plot_dict = pkl.load(f) else: total_plot_dict = {} @@ -737,8 +734,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): total_plot_dict.update({"ecal": plot_dict}) - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary @@ -746,6 +743,6 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): Props.write_to(args.save_path, output_dict) # save calibration objects - with open(args.results_path, "wb") as fp: - pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True) + with Path(args.results_path).open("wb") as fp: + Path(args.results_path).parent.mkdir(parents=True, exist_ok=True) pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index da83623..579b34a 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -2,10 +2,9 @@ import argparse import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -160,7 +159,7 @@ def lq_calibration( cal_dict = ecal_dict["pars"]["operations"] eres_dict = ecal_dict["results"]["ecal"] -with open(args.eres_file, "rb") as o: +with Path(args.eres_file).open("rb") as o: object_dict = pkl.load(o) if kwarg_dict["run_lq"] is True: @@ -172,7 +171,7 @@ def lq_calibration( for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - with open(args.files[0]) as f: + with Path(args.files[0]).open() as f: files = f.read().splitlines() files = sorted(files) @@ -213,7 +212,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -247,7 +246,7 @@ def eres_func(x): if args.plot_file: common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None if args.inplots: - with open(args.inplots, "rb") as r: + with Path(args.inplots).open("rb") as r: out_plot_dict = pkl.load(r) out_plot_dict.update({"lq": plot_dict}) else: @@ -258,24 +257,24 @@ def eres_func(x): elif common_dict is not None: out_plot_dict["common"] = common_dict - pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True) - with open(args.plot_file, "wb") as w: + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) results_dict = dict(**eres_dict, lq=out_dict) -pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True) +Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) final_hit_dict = { "pars": {"operations": cal_dict}, "results": results_dict, } Props.write_to(args.hit_pars, final_hit_dict) -pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True) +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) Props.write_to(args.lq_results, final_object_dict) -with open(args.lq_results, "wb") as w: +with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 9640087..5311c46 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -160,7 +159,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -226,10 +225,10 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} - pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True) + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) if args.plot_path: - pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True) - with open(args.plot_path, "wb") as f: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 8fb2b36..e9573e3 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path from typing import Callable import numpy as np @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -289,33 +288,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -325,7 +324,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -369,7 +368,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -403,21 +402,21 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.aoe_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 6ab1a4b..4064b3c 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -3,10 +3,9 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -83,29 +82,29 @@ def run_splitter(files): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -115,7 +114,7 @@ def run_splitter(files): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -179,7 +178,7 @@ def run_splitter(files): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -188,7 +187,7 @@ def run_splitter(files): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -249,22 +248,22 @@ def run_splitter(files): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 890554f..2ba88af 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -4,10 +4,9 @@ import copy import json import logging -import os -import pathlib import pickle as pkl import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -32,7 +31,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -285,33 +284,33 @@ def eres_func(x): for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run if isinstance(args.input_files, list): files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() else: - with open(args.input_files) as f: + with Path(args.input_files).open() as f: files = f.read().splitlines() files = sorted( @@ -321,7 +320,7 @@ def eres_func(x): final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -348,7 +347,7 @@ def eres_func(x): if args.pulser_files: mask = np.array([], dtype=bool) for file in args.pulser_files: - with open(file) as f: + with Path(file).open() as f: pulser_dict = json.load(f) pulser_mask = np.array(pulser_dict["mask"]) mask = np.append(mask, pulser_mask) @@ -357,7 +356,7 @@ def eres_func(x): elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -391,22 +390,22 @@ def eres_func(x): if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": {"operations": cal_dict[fk.timestamp]}, "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "w") as w: + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("w") as w: json.dump(final_hit_dict, w, indent=4) for out in args.lq_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index b6f12d7..a6eab18 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -3,11 +3,10 @@ import argparse import copy import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np import pandas as pd @@ -34,7 +33,7 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.period}-{fk.run}") run_files.append([]) @@ -447,29 +446,29 @@ def calibrate_partition( for ecal in args.ecal_file: cal = Props.read_from(ecal) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) cal_dict[fk.timestamp] = cal["pars"] results_dicts[fk.timestamp] = cal["results"] object_dict = {} for ecal in args.eres_file: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) object_dict[fk.timestamp] = cal inplots_dict = {} if args.inplots: for ecal in args.inplots: - with open(ecal, "rb") as o: + with Path(ecal).open("rb") as o: cal = pkl.load(o) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name) inplots_dict[fk.timestamp] = cal # sort files in dictionary where keys are first timestamp from run files = [] for file in args.input_files: - with open(file) as f: + with Path(file).open() as f: files += f.read().splitlines() files = sorted( @@ -479,7 +478,7 @@ def calibrate_partition( final_dict = {} all_file = run_splitter(sorted(files)) for filelist in all_file: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0])) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name) timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) @@ -518,7 +517,7 @@ def calibrate_partition( elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( @@ -553,21 +552,21 @@ def calibrate_partition( if args.plot_file: for plot_file in args.plot_file: - pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True) - with open(plot_file, "wb") as w: + Path(plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(plot_file).open("wb") as w: pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) for out in sorted(args.hit_pars): - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { "pars": cal_dict[fk.timestamp], "results": results_dicts[fk.timestamp], } - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) + Path(out).parent.mkdir(parents=True, exist_ok=True) Props.write_to(out, final_hit_dict) for out in args.fit_results: - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out)) - pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True) - with open(out, "wb") as w: + fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) + Path(out).parent.mkdir(parents=True, exist_ok=True) + with Path(out).open("wb") as w: pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index f62da8b..790ee0a 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import numpy as np from legendmeta import LegendMetadata @@ -72,10 +71,10 @@ if isinstance(args.cal_files, list): cal_files = [] for file in args.cal_files: - with open(file) as f: + with Path(file).open() as f: cal_files += f.read().splitlines() else: - with open(args.cal_files) as f: + with Path(args.cal_files).open() as f: cal_files = f.read().splitlines() cal_files = sorted( @@ -99,10 +98,10 @@ if isinstance(args.fft_files, list): fft_files = [] for file in args.fft_files: - with open(file) as f: + with Path(file).open() as f: fft_files += f.read().splitlines() else: - with open(args.fft_files) as f: + with Path(args.fft_files).open() as f: fft_files = f.read().splitlines() fft_files = sorted( @@ -223,7 +222,7 @@ elif args.tcm_filelist: # get pulser mask from tcm files - with open(args.tcm_filelist) as f: + with Path(args.tcm_filelist).open() as f: tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( @@ -305,11 +304,11 @@ plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 10af322..48f3d9f 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -3,11 +3,10 @@ import argparse import json import logging -import os -import pathlib import pickle as pkl import re import warnings +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -64,7 +63,7 @@ if isinstance(args.phy_files, list): phy_files = [] for file in sorted(args.phy_files): - with open(file) as f: + with Path(file).open() as f: run_files = f.read().splitlines() if len(run_files) == 0: continue @@ -78,7 +77,7 @@ ) bl_mask = np.append(bl_mask, bl_idxs) else: - with open(args.phy_files) as f: + with Path(args.phy_files).open() as f: phy_files = f.read().splitlines() phy_files = sorted(np.unique(phy_files)) bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0] @@ -147,11 +146,11 @@ log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") for file in args.save_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) + Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) if args.plot_path: for file in args.plot_path: - pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True) - with open(file, "wb") as f: + Path(file).parent.mkdir(parents=True, exist_ok=True) + with Path(file).open("wb") as f: pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index f72a04a..27c1101 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -1,7 +1,6 @@ import argparse import logging -import os -import pathlib +from pathlib import Path import lgdo.lh5 as lh5 import numpy as np @@ -41,7 +40,7 @@ if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": tcm_files = args.tcm_files[0] - with open(tcm_files) as f: + with Path(tcm_files).open() as f: tcm_files = f.read().splitlines() else: tcm_files = args.tcm_files @@ -51,5 +50,5 @@ tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) -pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True) +Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 5c01f97..9f646cc 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -2,9 +2,9 @@ This module contains classes to convert between keys and files using the patterns defined in patterns.py """ -import os import re from collections import namedtuple +from pathlib import Path import snakemake as smk @@ -216,7 +216,7 @@ def per_grouper(files): pers = [] per_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}" not in pers: pers.append(f"{fk.experiment}-{fk.period}") per_files.append([]) @@ -231,7 +231,7 @@ def run_grouper(files): runs = [] run_files = [] for file in files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name) if f"{fk.experiment}-{fk.period}-{fk.run}" not in runs: runs.append(f"{fk.experiment}-{fk.period}-{fk.run}") run_files.append([]) diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py index aec1572..651c137 100644 --- a/scripts/util/cal_grouping.py +++ b/scripts/util/cal_grouping.py @@ -3,7 +3,7 @@ """ import json -import os +from pathlib import Path from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( @@ -16,7 +16,7 @@ class cal_grouping: def __init__(self, setup, input_file): - with open(input_file) as r: + with Path(input_file).open() as r: self.datasets = json.load(r) self.expand_runs() self.setup = setup @@ -43,18 +43,13 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal for per in dataset: if dataset[per] == "all": files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist", - ) + Path(filelist_path(self.setup)) + / f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist" ] else: files += [ - os.path.join( - filelist_path(self.setup), - f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist", - ) - for run in dataset[per] + Path(filelist_path(self.setup)) + / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" ] return files @@ -80,7 +75,7 @@ def get_par_files( channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -128,7 +123,7 @@ def get_plt_files( channel = "{channel}" selected_par_files = [] for par_file in all_par_files: - fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file)) + fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) if ( fk.datatype == datatype and fk.experiment == experiment @@ -170,7 +165,7 @@ def get_log_file( datatype=datatype, name=name, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) if channel == "default": fk.channel = "{channel}" else: @@ -187,7 +182,7 @@ def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", data datatype=datatype, name=None, ) - fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0])) + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) return fk.timestamp def get_wildcard_constraints(self, dataset, channel): diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py index 1fb516b..390a7c1 100644 --- a/scripts/util/catalog.py +++ b/scripts/util/catalog.py @@ -43,7 +43,7 @@ def read_impl(sources): with file_name.open() as file: return yaml.safe_load(file) elif file_name.suffix == ".json": - with open(file_name) as file: + with file_name.open() as file: return json.load(file) else: msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index 2fc3525..f347975 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -2,10 +2,10 @@ This module creates the validity files used for determining the time validity of data """ -import glob import json import re import warnings +from pathlib import Path import snakemake as smk import yaml @@ -40,13 +40,13 @@ def from_filekey(cls, filekey, name_dict): @staticmethod def write_to_jsonl(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: for file_name in file_names: of.write(f"{file_name.get_json()}\n") @staticmethod def write_to_yaml(file_names, path): - with open(path, "w") as of: + with Path(path).open("w") as of: yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) @staticmethod @@ -104,7 +104,7 @@ def get_keys(keypart, search_pattern): except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = glob.glob(fn_glob_pattern) + files = Path(fn_glob_pattern).glob() keys = [] for f in files: m = tier_pattern_rx.match(f) diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index 7a9dd87..a21f6ae 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -3,7 +3,7 @@ to determine the par and par overwrite for a particular timestamp """ -import os +from pathlib import Path from .catalog import Catalog from .FileKey import ProcessingFileKey @@ -29,19 +29,18 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): - par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml") + par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" pars_files = pars_catalog.get_calib_files(par_file, timestamp) - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: pars_files, pars_files_overwrite = pars_catalog.match_pars_files( pars_files, pars_files_overwrite ) - pars_files = [os.path.join(get_pars_path(setup, tier), file) for file in pars_files] + pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] if len(pars_files_overwrite) > 0: pars_overwrite_files = [ - os.path.join(par_overwrite_path(setup), tier, file) - for file in pars_files_overwrite + Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite ] pars_files += pars_overwrite_files return pars_files diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 7f0b30c..cae1cd0 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -2,7 +2,7 @@ This module contains all the patterns needed for the data production """ -import os +from pathlib import Path from .utils import ( get_pars_path, @@ -56,61 +56,63 @@ def full_channel_pattern_with_extension(): def get_pattern_unsorted_data(setup): if sandbox_path(setup) is not None: - return os.path.join( - f"{sandbox_path(setup)}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{sandbox_path(setup)}") + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) else: return None def get_pattern_tier_daq(setup): - return os.path.join( - f"{tier_daq_path(setup)}", - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca", + return ( + Path(f"{tier_daq_path(setup)}") + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" ) def get_pattern_tier_raw_blind(setup): - return os.path.join( - f"{tier_raw_blind_path(setup)}", - "phy", - "{period}", - "{run}", - "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5", + return ( + Path(f"{tier_raw_blind_path(setup)}") + / "phy" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5" ) def get_pattern_tier(setup, tier, check_in_cycle=True): if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: - file_pattern = os.path.join( - get_tier_path(setup, tier), - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5", + file_pattern = ( + Path(get_tier_path(setup, tier)) + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + + f"{tier}.lh5" ) elif tier in ["evt_concat", "pet_concat"]: - file_pattern = os.path.join( - get_tier_path(setup, tier[:3]), - "{datatype}", - "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5", + file_pattern = ( + Path(get_tier_path(setup, tier[:3])) + / "{datatype}" + / "{experiment}-{period}-{run}-{datatype}-tier_" + + f"{tier[:3]}.lh5" ) elif tier == "skm": - file_pattern = os.path.join( - f"{tier_skm_path(setup)}", - "phy", - "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5", + file_pattern = ( + Path(f"{tier_skm_path(setup)}") + / "phy" + / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: - return "/tmp/" + os.path.basename(file_pattern) + if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: + return "/tmp/" + Path(file_pattern).name else: return file_pattern @@ -118,25 +120,27 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True): if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]: if name is not None: - return os.path.join( - get_pars_path(setup, tier), - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}", + return ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" ) else: - file_pattern = os.path.join( - get_pars_path(setup, tier), - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}", + file_pattern = ( + Path(get_pars_path(setup, tier)) + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}.{extension}" ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True: + if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: @@ -150,46 +154,48 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}_{name}.{ext}" ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}.{ext}" ) def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): if name is not None: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + f"{tier}_{name}-overwrite.{extension}", + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" ) else: - return os.path.join( - f"{par_overwrite_path(setup)}", - tier, - "{datatype}", - "{period}", - "{run}", - "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + return ( + Path(f"{par_overwrite_path(setup)}") + / tier + / "{datatype}" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier - + f"-overwrite.{extension}", + + f"-overwrite.{extension}" ) @@ -197,90 +203,104 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" if datatype is None: datatype = "{datatype}" if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + return ( + Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" - + f"{tier}.{extension}", + + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-" + return ( + Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + datatype + "-{timestamp}" - + f"par_{tier}_{name}.{extension}", + + f"par_{tier}_{name}.{extension}" ) def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}", + return ( + Path(f"{tmp_par_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}.{extension}" ) else: - return os.path.join( - f"{tmp_par_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}_{name}.{extension}", + return ( + Path(f"{tmp_par_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl", + return ( + Path(f"{tmp_plts_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + tier + + ".pkl" ) else: - return os.path.join( - f"{tmp_plts_path(setup)}", - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl", + return ( + Path(f"{tmp_plts_path(setup)}") + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + f"{tier}_{name}.pkl" ) def get_pattern_plts(setup, tier, name=None): if name is None: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + ".dir" ) else: - return os.path.join( - f"{plts_path(setup)}", - tier, - "cal", - "{period}", - "{run}", - "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir", + return ( + Path(f"{plts_path(setup)}") + / tier + / "cal" + / "{period}" + / "{run}" + / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + "_" + + name + + ".dir" ) def get_pattern_log(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + + processing_step + + ".log" ) def get_pattern_log_channel(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + + processing_step + + ".log" ) def get_pattern_log_concat(setup, processing_step): - return os.path.join( - f"{tmp_log_path(setup)}", - processing_step, - "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log", + return ( + Path(f"{tmp_log_path(setup)}") + / processing_step + / "{experiment}-{period}-{run}-{datatype}-" + + processing_step + + ".log" ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 2cb53ef..fd433c7 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -189,7 +189,7 @@ def subst_vars_in_snakemake_config(workflow, config): config_filename = workflow.overwrite_configfiles[0] # ToDo: Better way of handling this? subst_vars( config, - var_values={"_": os.path.dirname(config_filename)}, + var_values={"_": Path(config_filename).parent}, use_env=True, ignore_missing=False, ) @@ -203,8 +203,8 @@ def run_splitter(files): runs = [] run_files = [] for file in files: - base = os.path.basename(file) - file_name = os.path.splitext(base)[0] + base = Path(file).name + file_name = Path(base).name parts = file_name.split("-") run_no = parts[3] if run_no not in runs: diff --git a/tests/test_util.py b/tests/test_util.py index 707843b..010c749 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from scripts.util import ( @@ -20,7 +19,7 @@ testprod = Path(__file__).parent / "dummy_cycle" -with open(str(testprod / "config.json")) as r: +with testprod.open() as r: setup = json.load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] @@ -107,12 +106,12 @@ def test_create_pars_keylist(): def test_pars_loading(): pars_files = CalibCatalog.get_calib_files( - os.path.join(par_dsp_path(setup), "validity.jsonl"), "20230101T123456Z" + Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z" ) assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] par_override_files = CalibCatalog.get_calib_files( - os.path.join(par_overwrite_path(setup), "dsp", "validity.jsonl"), "20230101T123456Z" + Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z" ) pars_files, pars_files_overwrite = pars_catalog.match_pars_files( @@ -122,12 +121,12 @@ def test_pars_loading(): assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == { - os.path.join( - par_dsp_path(setup), - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", + ( + Path(par_dsp_path(setup)) + / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", ), - os.path.join( - par_overwrite_path(setup), - "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", + ( + Path(par_overwrite_path(setup)) + / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", ), } From 323dd0966c02bd9486c91bebde472ed965b13517 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 28 Nov 2024 19:04:37 +0100 Subject: [PATCH 010/101] debugging --- Snakefile | 92 +++++++++--------------- rules/blinding_calibration.smk | 10 +-- rules/blinding_check.smk | 10 +-- rules/chanlist_gen.smk | 8 +-- rules/common.smk | 50 +++++++------ rules/dsp.smk | 33 +++++---- rules/evt.smk | 11 +-- rules/filelist_gen.smk | 34 ++++++--- rules/hit.smk | 24 ++++--- rules/pht.smk | 35 +++++---- rules/pht_fast.smk | 6 +- rules/psp.smk | 41 +++++++---- rules/qc_phy.smk | 11 ++- rules/raw.smk | 1 - scripts/create_chankeylist.py | 7 +- scripts/util/FileKey.py | 8 +++ scripts/util/__init__.py | 16 ++--- scripts/util/cal_grouping.py | 38 +++++++--- scripts/util/catalog.py | 2 +- scripts/util/create_pars_keylist.py | 31 ++++---- scripts/util/pars_loading.py | 8 +-- scripts/util/patterns.py | 106 +++++++++++----------------- scripts/util/utils.py | 4 ++ 23 files changed, 311 insertions(+), 275 deletions(-) diff --git a/Snakefile b/Snakefile index b2daaa2..39a3dee 100644 --- a/Snakefile +++ b/Snakefile @@ -10,7 +10,7 @@ This includes: - the same for partition level tiers """ -import pathlib +from pathlib import Path import os import json import sys @@ -20,8 +20,8 @@ from collections import OrderedDict import logging import scripts.util as ds -from scripts.util.pars_loading import pars_catalog -from scripts.util.patterns import get_pattern_tier_raw +from scripts.util.pars_loading import ParsCatalog +from scripts.util.patterns import get_pattern_tier from scripts.util.utils import ( subst_vars_in_snakemake_config, runcmd, @@ -31,6 +31,7 @@ from scripts.util.utils import ( metadata_path, tmp_log_path, pars_path, + det_status_path, ) # Set with `snakemake --configfile=/path/to/your/config.json` @@ -43,8 +44,9 @@ setup = config["setups"]["l200"] configs = config_path(setup) chan_maps = chan_map_path(setup) meta = metadata_path(setup) +det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json")) +part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml") basedir = workflow.basedir @@ -72,32 +74,6 @@ include: "rules/blinding_calibration.smk" include: "rules/qc_phy.smk" -# Log parameter catalogs in validity.jsonl files -hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") -if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) -pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - -pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") -if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) -pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - -dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") -if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) -pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - -psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") -if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) -pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) -ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) - - localrules: gen_filelist, autogen_output, @@ -111,36 +87,36 @@ onstart: shell('{swenv} python3 -B -c "import ' + pkg + '"') # Log parameter catalogs in validity.jsonl files - hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl") - if os.path.isfile(hit_par_cat_file): - os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl")) - pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file) - - pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl") - if os.path.isfile(pht_par_cat_file): - os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl")) - pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file) - - dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl") - if os.path.isfile(dsp_par_cat_file): - os.remove(dsp_par_cat_file) - pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file) - - psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl") - if os.path.isfile(psp_par_cat_file): - os.remove(psp_par_cat_file) - pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file) + hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" + if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() + Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + + pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" + if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() + Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + + dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" + if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() + Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + + psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" + if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() + Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) + ds.ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) onsuccess: from snakemake.report import auto_report rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" - pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True) + Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") with open(os.path.join(rep_dir, "dag.txt"), "w") as f: @@ -190,12 +166,12 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier_raw(setup), - ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"), - analysis_runs_file=os.path.join(configs, "analysis_runs.json"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", + analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + Path(filelist_path(setup)) / "{label}-{tier}.filelist", run: if len(input) == 0: print( diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index bcf0d64..85ee2f6 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -11,6 +11,7 @@ from scripts.util.patterns import ( get_pattern_plts_tmp_channel, get_pattern_log_channel, ) +from pathlib import Path rule build_blinding_calibration: @@ -19,9 +20,8 @@ rule build_blinding_calibration: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", params: timestamp="{timestamp}", datatype="cal", @@ -57,7 +57,7 @@ rule build_plts_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), @@ -79,7 +79,7 @@ rule build_pars_blinding: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, name="blindcal", ), diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index ac7240c..eb3407d 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -12,6 +12,7 @@ from scripts.util.patterns import ( get_pattern_plts, get_pattern_pars, ) +from pathlib import Path rule build_blinding_check: @@ -20,9 +21,8 @@ rule build_blinding_check: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), + files=Path(filelist_path(setup)) + / "all-{experiment}-{period}-{run}-cal-raw.filelist", par_file=get_blinding_curve_file, params: timestamp="{timestamp}", @@ -59,7 +59,7 @@ rule build_plts_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), output: @@ -80,7 +80,7 @@ rule build_pars_raw: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts( diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk index 1dc4957..820d0fa 100644 --- a/rules/chanlist_gen.smk +++ b/rules/chanlist_gen.smk @@ -13,7 +13,7 @@ from scripts.util.utils import filelist_path, runcmd def get_par_chanlist( - setup, keypart, tier, basedir, configs, chan_maps, name=None, extension="json" + setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" ): tier_pattern = "((?P[^_]+)(\\_(?P[^_]+)(\\_(?P[^_]+)?)?)?)?" keypart_rx = re.compile(tier_pattern) @@ -28,7 +28,7 @@ def get_par_chanlist( f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) @@ -42,7 +42,7 @@ def get_par_chanlist( return filenames -def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=None): +def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None): key = ChannelProcKey.parse_keypart(keypart) output_file = os.path.join( @@ -50,7 +50,7 @@ def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=Non f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}" + cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" os.system(cmd) diff --git a/rules/common.smk b/rules/common.smk index b985044..6ba4654 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -2,16 +2,17 @@ Helper functions for running data production """ -import pathlib, os +from pathlib import Path from scripts.util.patterns import ( par_overwrite_path, - par_raw_path, + get_pars_path, get_pattern_unsorted_data, get_pattern_tier_daq, get_pattern_tier, get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey +from scripts.util.catalog import Catalog from scripts.util import utils @@ -21,8 +22,8 @@ def ro(path): def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" - par_files = pars_catalog.get_calib_files( - Path(par_overwrite_path(setup)) / "raw" / "validity.jsonl", + par_files = Catalog.get_files( + Path(par_overwrite_path(setup)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): @@ -36,13 +37,13 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" - par_files = pars_catalog.get_calib_files( - Path(par_raw_path(setup)) / "validity.jsonl", wildcards.timestamp + par_files = Catalog.get_files( + Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return str(Path(par_raw_path(setup)) / par_files) + return Path(get_pars_path(setup, "raw")) / par_files else: - return [str(Path(par_raw_path(setup)) / par_file) for par_file in par_files] + return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files] def set_last_rule_name(workflow, new_name): @@ -70,35 +71,38 @@ def set_last_rule_name(workflow, new_name): workflow.check_localrules() -def get_svm_file(wildcards, tier, name): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp +def get_input_par_file(wildcards, tier, name): + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) for pars_file in pars_files_overwrite: - if name in pars_file: - return os.path.join(par_overwrite_path(setup), tier, pars_file) + if name in str(pars_file): + return Path(par_overwrite_path(setup)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl") + par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" if timestamp is not None: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + timestamp, ) else: - pars_files_overwrite = pars_catalog.get_calib_files( - par_overwrite_file, wildcards.timestamp + pars_files_overwrite = Catalog.get_files( + par_overwrite_file, + wildcards.timestamp, ) if name is None: - fullname = f"{tier}-overwrite.json" + fullname = f"{tier}-overwrite.yaml" else: - fullname = f"{tier}_{name}-overwrite.json" + fullname = f"{tier}_{name}-overwrite.yaml" out_files = [] for pars_file in pars_files_overwrite: - if fullname in pars_file: - out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file)) + if fullname in str(pars_file): + out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: diff --git a/rules/dsp.smk b/rules/dsp.smk index f8ea4a3..3fa105c 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing dsp tier. This is done in 4 steps: - running dsp over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_dsp_path +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.create_pars_keylist import ParsKeyResolve from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -18,16 +19,20 @@ from scripts.util.patterns import ( get_pattern_pars_tmp, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, - get_pattern_pars_svm, ) -dsp_par_catalog = pars_key_resolve.get_par_catalog( +dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) +dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" +if dsp_par_cat_file.is_file(): + dsp_par_cat_file.unlink() +Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) + rule build_pars_dsp_tau: input: @@ -218,14 +223,16 @@ rule build_pars_dsp_eopt: rule build_svm_dsp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( + wildcards, "dsp", "svm_hyperpars" + ), + train_data=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" ).replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -288,7 +295,7 @@ rule build_pars_dsp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -344,7 +351,7 @@ rule build_pars_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -385,7 +392,7 @@ rule build_dsp: input: raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), diff --git a/rules/evt.smk b/rules/evt.smk index c760b54..91f04dd 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -2,13 +2,8 @@ Snakemake rules for processing evt tier. """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog from scripts.util.patterns import ( - get_pattern_tier_hit, - get_pattern_tier_dsp, - get_pattern_tier_tcm, - get_pattern_tier_pht, - get_pattern_tier_psp, get_pattern_tier, get_pattern_log, get_pattern_pars, @@ -31,10 +26,10 @@ for tier in ("evt", "pet"): else get_pattern_tier(setup, "pht", check_in_cycle=False) ), tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), - xtalk_matrix=lambda wildcards: get_svm_file( + xtalk_matrix=lambda wildcards: get_input_par_file( tier=tier, wildcards=wildcards, name="xtc" ), - par_files=lambda wildcards: pars_catalog.get_par_file( + par_files=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 557d492..cb27661 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -1,6 +1,6 @@ import glob -import json -import os +import json, yaml +from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind @@ -9,9 +9,20 @@ from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): ignore_keys = [] if ignore_keys_file is not None: - if os.path.isfile(ignore_keys_file): - with open(ignore_keys_file) as f: - ignore_keys = f.read().splitlines() + if Path(ignore_keys_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(ignore_keys_file).open() as f: + ignore_keys = json.load(f) + elif Path(ignore_keys_file).suffix == ".keylist": + with Path(ignore_keys_file).open() as f: + ignore_keys = f.read().splitlines() + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(ignore_keys_file).open() as f: + ignore_keys = yaml.safe_load(f) + else: + raise Warning( + "ignore_keys_file file not in json, yaml or keylist format" + ) ignore_keys = [ key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys @@ -23,9 +34,16 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): ignore_keys = [] if analysis_runs_file is not None: - if os.path.isfile(analysis_runs_file): - with open(analysis_runs_file) as f: - analysis_runs = json.load(f) + if Path(analysis_runs_file).is_file(): + if Path(ignore_keys_file).suffix == ".json": + with Path(analysis_runs_file).open() as f: + analysis_runs = json.load(f) + elif Path(ignore_keys_file).suffix in (".yaml", ".yml"): + with Path(analysis_runs_file).open() as f: + analysis_runs = yaml.safe_load(f) + else: + raise Warning("analysis_runs file not in json or yaml format") + analysis_runs = [] else: analysis_runs = [] print("no analysis_runs file found") diff --git a/rules/hit.smk b/rules/hit.smk index f1bb0ba..af1fcaf 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -6,7 +6,9 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -19,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -hit_par_catalog = ds.pars_key_resolve.get_par_catalog( +hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_hit"], "lar": ["par_hit"]}, ) +hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" +if hit_par_cat_file.is_file(): + hit_par_cat_file.unlink() +Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) + # This rule builds the qc using the calibration dsp files and fft files rule build_qc: @@ -72,7 +80,7 @@ rule build_energy_calibration: ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "dsp" ) ), @@ -216,7 +224,7 @@ rule build_pars_hit_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -247,7 +255,7 @@ rule build_plts_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), output: @@ -270,7 +278,7 @@ rule build_pars_hit: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "hit"), @@ -297,7 +305,7 @@ rule build_pars_hit: rule build_hit: input: dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "hit" ), output: diff --git a/rules/pht.smk b/rules/pht.smk index 76542a3..dad1a24 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -pht_par_catalog = ds.pars_key_resolve.get_par_catalog( +pht_par_catalog = ds.ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_pht"], "lar": ["par_pht"]}, ) +pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" +if pht_par_cat_file.is_file(): + pht_par_cat_file.unlink() +Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) + intier = "psp" @@ -50,7 +57,7 @@ for key, dataset in part.datasets.items(): cal_files=part.get_filelists(partition, key, intier), fft_files=part.get_filelists(partition, key, intier, datatype="fft"), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -207,7 +214,7 @@ rule build_per_energy_calibration: pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), ctc_dict=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, intier ) ), @@ -258,7 +265,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -440,7 +447,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -620,7 +627,7 @@ for key, dataset in part.datasets.items(): input: files=part.get_filelists(partition, key, intier), pulser_files=[ - file.replace("par_pht", "par_tcm") + str(file).replace("par_pht", "par_tcm") for file in part.get_par_files( pht_par_catalog, partition, @@ -793,7 +800,7 @@ rule build_pars_pht_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -822,7 +829,7 @@ rule build_plts_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), output: @@ -843,7 +850,7 @@ rule build_pars_pht: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, ), plts=get_pattern_plts(setup, "pht"), @@ -868,7 +875,7 @@ rule build_pars_pht: rule build_pht: input: dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), - pars_file=lambda wildcards: pars_catalog.get_par_file( + pars_file=lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "pht" ), output: diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 5672011..f83e534 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -1,6 +1,6 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, diff --git a/rules/psp.smk b/rules/psp.smk index a959cf4..53e8f59 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from scripts.util.utils import set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -20,12 +21,18 @@ from scripts.util.patterns import ( get_pattern_pars, ) -psp_par_catalog = pars_key_resolve.get_par_catalog( +psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(setup, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) +psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" +if psp_par_cat_file.is_file(): + psp_par_cat_file.unlink() +Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) +ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) + psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): @@ -172,14 +179,18 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( + wildcards, "psp", "svm_hyperpars" + ), + train_data=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + ) + .as_posix() + .replace("hyperpars.json", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -221,7 +232,7 @@ rule build_pars_psp_objects: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, name="objects", extension="pkl", @@ -250,7 +261,7 @@ rule build_plts_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -271,7 +282,7 @@ rule build_pars_psp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, - configs, + det_status, chan_maps, ), output: @@ -298,7 +309,7 @@ rule build_pars_psp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, name="dplms", extension="lh5", @@ -337,9 +348,9 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), pars_file=ancient( - lambda wildcards: pars_catalog.get_par_file( + lambda wildcards: ParsCatalog.get_par_file( setup, wildcards.timestamp, "psp" ) ), diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk index 5b9cd6f..b89d8d3 100644 --- a/rules/qc_phy.smk +++ b/rules/qc_phy.smk @@ -1,11 +1,10 @@ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name +from scripts.util.pars_loading import ParsCatalog +from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.util.utils import filelist_path, set_last_rule_name from scripts.util.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_par_pht, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -138,7 +137,7 @@ rule build_plts_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), @@ -160,7 +159,7 @@ rule build_pars_pht_phy: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, - configs, + det_status, chan_maps, name="qcphy", ), diff --git a/rules/raw.smk b/rules/raw.smk index a81520a..8239519 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,6 +1,5 @@ from scripts.util.patterns import ( get_pattern_tier_daq, - get_pattern_tier_raw, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index 6ed4510..f01c879 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -4,7 +4,7 @@ from legendmeta import LegendMetadata, TextDB argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--det_status", help="det_status", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) @@ -12,8 +12,8 @@ argparser.add_argument("--output_file", help="output_file", type=str, required=True) args = argparser.parse_args() -configs = TextDB(args.configs, lazy=True) -status_map = configs.on(args.timestamp, system=args.datatype)["analysis"] +det_status = TextDB(args.det_status, lazy=True) +status_map = det_status.statuses.on(args.timestamp, system=args.datatype) channel_map = LegendMetadata(args.channelmap, lazy=True) chmap = channel_map.channelmaps.on(args.timestamp) @@ -23,7 +23,6 @@ for chan in status_map if status_map[chan]["processable"] is True and chmap[chan].system == "geds" ] - Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) with Path(args.output_file).open("w") as f: for chan in channels: diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py index 9f646cc..ca4573c 100644 --- a/scripts/util/FileKey.py +++ b/scripts/util/FileKey.py @@ -57,6 +57,8 @@ def get_filekey_from_pattern(cls, filename, pattern=None): except AttributeError: key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern)) else: + if isinstance(pattern, Path): + pattern = pattern.as_posix() try: key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern)) except AttributeError: @@ -92,6 +94,8 @@ def parse_keypart(cls, keypart): return cls(**d) def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if kwargs is None: return smk.io.expand(pattern, **self._asdict()) else: @@ -163,6 +167,8 @@ def name(self): return f"{super().name}-{self.processing_step}" def get_path_from_filekey(self, pattern, **kwargs): + if isinstance(pattern, Path): + pattern = pattern.as_posix() if not isinstance(pattern, str): pattern = pattern(self.tier, self.identifier) if kwargs is None: @@ -198,6 +204,8 @@ def _asdict(self): @staticmethod def get_channel_files(keypart, par_pattern, chan_list): + if isinstance(par_pattern, Path): + par_pattern = par_pattern.as_posix() d = ChannelProcKey.parse_keypart(keypart) filenames = [] for chan in chan_list: diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py index 90b7204..caa4dd2 100644 --- a/scripts/util/__init__.py +++ b/scripts/util/__init__.py @@ -1,8 +1,8 @@ -from .CalibCatalog import CalibCatalog, Props, PropsStream -from .create_pars_keylist import pars_key_resolve -from .dataset_cal import dataset_file +from .cal_grouping import CalGrouping +from .catalog import Catalog, Props, PropsStream +from .create_pars_keylist import ParsKeyResolve from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey -from .pars_loading import pars_catalog +from .pars_loading import ParsCatalog from .utils import ( runcmd, subst_vars, @@ -14,13 +14,13 @@ __all__ = [ "Props", "PropsStream", - "CalibCatalog", - "pars_key_resolve", - "dataset_file", + "Catalog", + "ParsKeyResolve", + "CalGrouping", "FileKey", "ProcessingFileKey", "ChannelProcKey", - "pars_catalog", + "ParsCatalog", "unix_time", "runcmd", "subst_vars_impl", diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py index 651c137..e41d5c7 100644 --- a/scripts/util/cal_grouping.py +++ b/scripts/util/cal_grouping.py @@ -5,19 +5,26 @@ import json from pathlib import Path +import yaml + from .FileKey import ChannelProcKey, ProcessingFileKey from .patterns import ( get_pattern_log_channel, + get_pattern_pars, get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) from .utils import filelist_path -class cal_grouping: +class CalGrouping: def __init__(self, setup, input_file): - with Path(input_file).open() as r: - self.datasets = json.load(r) + if Path(input_file).suffix == ".json": + with Path(input_file).open() as r: + self.datasets = json.load(r) + elif Path(input_file).suffix in (".yaml", ".yml"): + with Path(input_file).open() as r: + self.datasets = yaml.safe_load(r) self.expand_runs() self.setup = setup @@ -28,7 +35,7 @@ def expand_runs(self): if isinstance(runs, str) and ".." in runs: start, end = runs.split("..") self.datasets[channel][part][per] = [ - f"r{x:02}" for x in range(int(start[2:]), int(end) + 1) + f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1) ] def get_dataset(self, dataset, channel): @@ -49,7 +56,8 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal else: files += [ Path(filelist_path(self.setup)) - / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" + / f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist" + for run in dataset[per] ] return files @@ -62,14 +70,19 @@ def get_par_files( experiment="l200", datatype="cal", name=None, - extension="json", + extension="yaml", ): dataset = self.get_dataset(dataset, channel) all_par_files = [] for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" @@ -117,7 +130,12 @@ def get_plt_files( for item in catalog: par_files = item.apply for par_file in par_files: - if par_file.split("-")[-1] == f"par_{tier}.json": + if ( + par_file.split("-")[-1] + == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( + "-" + )[-1] + ): all_par_files.append(par_file) if channel == "default": channel = "{channel}" @@ -201,6 +219,6 @@ def get_wildcard_constraints(self, dataset, channel): out_string = "" for channel in exclude_chans: out_string += f"(?!{channel})" - return out_string + r"ch\d{7}" + return out_string + r"^[VPCB]\d{1}\w{5}$" else: - return r"ch\d{7}" + return r"^[VPCB]\d{1}\w{5}$" diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py index 390a7c1..9ec9b80 100644 --- a/scripts/util/catalog.py +++ b/scripts/util/catalog.py @@ -79,7 +79,7 @@ class PropsStream: @staticmethod def get(value): - if isinstance(value, str): + if isinstance(value, (str, Path)): return PropsStream.read_from(value) if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index f347975..c3e1f22 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -14,7 +14,7 @@ from .patterns import par_validity_pattern -class pars_key_resolve: +class ParsKeyResolve: def __init__(self, valid_from, category, apply): self.valid_from = valid_from @@ -70,7 +70,7 @@ def generate_par_keylist(keys): keys = sorted(keys, key=FileKey.get_unix_timestamp) keylist.append(keys[0]) for key in keys[1:]: - matched_key = pars_key_resolve.match_keys(keylist[-1], key) + matched_key = ParsKeyResolve.match_keys(keylist[-1], key) if matched_key not in keylist: keylist.append(matched_key) else: @@ -89,10 +89,10 @@ def match_entries(entry1, entry2): @staticmethod def match_all_entries(entrylist, name_dict): out_list = [] - out_list.append(pars_key_resolve.from_filekey(entrylist[0], name_dict)) + out_list.append(ParsKeyResolve.from_filekey(entrylist[0], name_dict)) for entry in entrylist[1:]: - new_entry = pars_key_resolve.from_filekey(entry, name_dict) - pars_key_resolve.match_entries(out_list[-1], new_entry) + new_entry = ParsKeyResolve.from_filekey(entry, name_dict) + ParsKeyResolve.match_entries(out_list[-1], new_entry) out_list.append(new_entry) return out_list @@ -100,14 +100,17 @@ def match_all_entries(entrylist, name_dict): def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) try: - tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) + except AttributeError: - tier_pattern_rx = re.compile(smk.io.regex(search_pattern)) + tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] - files = Path(fn_glob_pattern).glob() + p = Path(fn_glob_pattern) + parts = p.parts[p.is_absolute() :] + files = Path(p.root).glob(str(Path(*parts))) keys = [] for f in files: - m = tier_pattern_rx.match(f) + m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() key = FileKey(**d) @@ -118,19 +121,19 @@ def get_keys(keypart, search_pattern): def get_par_catalog(keypart, search_patterns, name_dict): if isinstance(keypart, str): keypart = [keypart] - if isinstance(search_patterns, str): + if isinstance(search_patterns, (str, Path)): search_patterns = [search_patterns] keylist = [] for search_pattern in search_patterns: for keypar in keypart: - keylist += pars_key_resolve.get_keys(keypar, search_pattern) + keylist += ParsKeyResolve.get_keys(keypar, search_pattern) if len(keylist) != 0: keys = sorted(keylist, key=FileKey.get_unix_timestamp) - keylist = pars_key_resolve.generate_par_keylist(keys) + keylist = ParsKeyResolve.generate_par_keylist(keys) - entrylist = pars_key_resolve.match_all_entries(keylist, name_dict) + entrylist = ParsKeyResolve.match_all_entries(keylist, name_dict) else: msg = "No Keys found" warnings.warn(msg, stacklevel=0) - entrylist = [pars_key_resolve("00000000T000000Z", "all", [])] + entrylist = [ParsKeyResolve("00000000T000000Z", "all", [])] return entrylist diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py index a21f6ae..137ae03 100644 --- a/scripts/util/pars_loading.py +++ b/scripts/util/pars_loading.py @@ -12,7 +12,7 @@ from .utils import get_pars_path, par_overwrite_path -class pars_catalog(Catalog): +class ParsCatalog(Catalog): @staticmethod def match_pars_files(filelist1, filelist2): for file2 in filelist2: @@ -30,11 +30,11 @@ def match_pars_files(filelist1, filelist2): @staticmethod def get_par_file(setup, timestamp, tier): par_file = Path(get_pars_path(setup, tier)) / "validity.yaml" - pars_files = pars_catalog.get_calib_files(par_file, timestamp) + pars_files = ParsCatalog.get_files(par_file, timestamp) par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" - pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp) + pars_files_overwrite = ParsCatalog.get_files(par_overwrite_file, timestamp) if len(pars_files_overwrite) > 0: - pars_files, pars_files_overwrite = pars_catalog.match_pars_files( + pars_files, pars_files_overwrite = ParsCatalog.match_pars_files( pars_files, pars_files_overwrite ) pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index cae1cd0..2418ead 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -14,7 +14,6 @@ tier_daq_path, tier_path, tier_raw_blind_path, - tier_skm_path, tmp_log_path, tmp_par_path, tmp_plts_path, @@ -91,28 +90,26 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" - + f"{tier}.lh5" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5") ) elif tier in ["evt_concat", "pet_concat"]: file_pattern = ( Path(get_tier_path(setup, tier[:3])) / "{datatype}" - / "{experiment}-{period}-{run}-{datatype}-tier_" - + f"{tier[:3]}.lh5" + / ("{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5") ) elif tier == "skm": file_pattern = ( - Path(f"{tier_skm_path(setup)}") + Path(f"{get_tier_path(setup, tier)}") / "phy" / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5" ) else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: - return "/tmp/" + Path(file_pattern).name + if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True: + return "/tmp/" + file_pattern.name else: return file_pattern @@ -125,8 +122,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-par_" - + f"{tier}_{name}.{extension}" + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}_{name}.{extension}" + ) ) else: file_pattern = ( @@ -134,19 +133,21 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-par_" - + f"{tier}.{extension}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}") ) else: msg = "invalid tier" raise Exception(msg) - if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True: + if ( + pars_path(setup) not in str(Path(file_pattern).resolve(strict=False)) + and check_in_cycle is True + ): if name is None: return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" else: return ( "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{extension}" + f"par_{tier}_{name}.{extension}" ) else: return file_pattern @@ -160,8 +161,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}_{name}.{ext}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}") ) else: return ( @@ -170,8 +170,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-" - + f"par_{tier}.{ext}" + / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}") ) @@ -183,8 +182,10 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - f"{tier}_{name}-overwrite.{extension}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + f"{tier}_{name}-overwrite.{extension}" + ) ) else: return ( @@ -193,9 +194,11 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"): / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" - + tier - + f"-overwrite.{extension}" + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + + tier + + f"-overwrite.{extension}" + ) ) @@ -203,15 +206,12 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" if datatype is None: datatype = "{datatype}" if name is None: - return ( - Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" - + datatype - + "-{timestamp}-par_" - + f"{tier}.{extension}" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}" ) else: - return ( - Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-" + datatype + "-{timestamp}" + f"par_{tier}_{name}.{extension}" @@ -220,32 +220,24 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: - return ( - Path(f"{tmp_par_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" - + f"{tier}.{extension}" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}" ) else: - return ( - Path(f"{tmp_par_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + return Path(f"{tmp_par_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}_{name}.{extension}" ) def get_pattern_plts_tmp_channel(setup, tier, name=None): if name is None: - return ( - Path(f"{tmp_plts_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + tier - + ".pkl" + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl" ) else: - return ( - Path(f"{tmp_plts_path(setup)}") - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" - + f"{tier}_{name}.pkl" + return Path(f"{tmp_plts_path(setup)}") / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl" ) @@ -257,9 +249,7 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" - + tier - + ".dir" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir") ) else: return ( @@ -268,11 +258,7 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-cal-{timestamp}-plt_" - + tier - + "_" - + name - + ".dir" + / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir") ) @@ -280,9 +266,7 @@ def get_pattern_log(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-{datatype}-{timestamp}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log") ) @@ -290,9 +274,7 @@ def get_pattern_log_channel(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log") ) @@ -300,7 +282,5 @@ def get_pattern_log_concat(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / "{experiment}-{period}-{run}-{datatype}-" - + processing_step - + ".log" + / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log") ) diff --git a/scripts/util/utils.py b/scripts/util/utils.py index fd433c7..319eaa6 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -114,6 +114,10 @@ def chan_map_path(setup): return setup["paths"]["chan_map"] +def det_status_path(setup): + return setup["paths"]["detector_status"] + + def metadata_path(setup): return setup["paths"]["metadata"] From bbf65e90c9b4ead350b3761de17a473e9b2034fc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Nov 2024 15:14:35 +0100 Subject: [PATCH 011/101] move info from readme to docs --- README.md | 112 ------------------------------------ docs/Makefile | 21 +++++++ docs/source/developer.rst | 15 +++++ docs/source/index.rst | 41 +++++++++++++ docs/source/user_manual.rst | 98 +++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 112 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/source/developer.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/user_manual.rst diff --git a/README.md b/README.md index 2459337..3565167 100644 --- a/README.md +++ b/README.md @@ -3,115 +3,3 @@ Implementation of an automatic data processing flow for L200 data, based on [Snakemake](https://snakemake.readthedocs.io/). - - -## Configuration - -Data processing resources are configured via a single site-dependent (and -possibly user-dependent) configuration file, named `config.json` in the -following. You may choose an arbitrary name, though. - -Use the included [templates/config.json](templates/config.json) as a template -and adjust the data base paths as necessary. Note that, when running Snakemake, -the default path to the config file is `./config.json`. - - -## Key-Lists - -Data generation is based on key-lists, which are flat text files -(extension ".keylist") containing one entry of the form -`{experiment}-{period}-{run}-{datatype}-{timestamp}` per line. - -Key-lists can be auto-generated based on the available DAQ files -using Snakemake targets of the form - -* `all-{experiment}.keylist` -* `all-{experiment}-{period}.keylist` -* `all-{experiment}-{period}-{run}.keylist` -* `all-{experiment}-{period}-{run}-{datatype}.keylist` - -which will generate the list of available file keys for all l200 files, resp. -a specific period, or a specific period and run, etc. - -For example: -```shell -$ snakemake all-l200-myper.keylist -``` -will generate a key-list with all files regarding period `myper`. - - -## File-Lists - -File-lists are flat files listing output files that should be generated, -with one file per line. A file-list will typically be generated for a given -data tier from a key-list, using the Snakemake targets of the form -`{label}-{tier}.filelist` (generated from `{label}.keylist`). - -For file lists based on auto-generated key-lists like -`all-{experiment}-{period}-{tier}.filelist`, the corresponding key-list -(`all-{experiment}-{period}.keylist` in this case) will be created -automatically, if it doesn't exist. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.filelist -``` - -File-lists may of course also be derived from custom keylists, generated -manually or by other means, e.g. `my-dataset-raw.filelist` will be -generated from `my-dataset.keylist`. - - -## Main output generation - -Usually, the main output will be determined by a file-list, resp. a key-list -and data tier. The special output target `{label}-{tier}.gen` is used to -generate all files listed in `{label}-{tier}.filelist`. After the files -are created, the empty file `{label}-{tier}.filelist` will be created to -mark the successful data production. - -Snakemake targets like `all-{experiment}-{period}-{tier}.gen` may be used -to automatically generate key-lists and file-lists (if not already present) -and produce all possible output for the given data tier, based on available -tier0 files which match the target. - -Example: -```shell -$ snakemake all-mydet-mymeas-tier2.gen -``` -Targets like `my-dataset-raw.gen` (derived from a key-list -`my-dataset.keylist`) are of course allowed as well. - - -## Monitoring - -Snakemake supports monitoring by connecting to a -[panoptes](https://github.com/panoptes-organization/panoptes) server. - -Run (e.g.) -```shell -$ panoptes --port 5000 -``` -in the background to run a panoptes server instance, which comes with a -GUI that can be accessed with a web-brower on the specified port. - -Then use the Snakemake option `--wms-monitor` to instruct Snakemake to push -progress information to the panoptes server: -```shell -snakemake --wms-monitor http://127.0.0.1:5000 [...] -``` - -## Using software containers - -This dataflow doesn't use Snakemake's internal Singularity support, but -instead supports Singularity containers via -[`venv`](https://github.com/oschulz/singularity-venv) environments -for greater control. - -To use this, the path to `venv` and the name of the environment must be set -in `config.json`. - -This is only relevant then running Snakemake *outside* of the software -container, e.g. then using a batch system (see below). If Snakemake -and the whole workflow is run inside of a container instance, no -container-related settings in `config.json` are required. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..9be493d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +SHELL := /bin/bash +SOURCEDIR = source +BUILDDIR = build + +all: apidoc + sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going + +apidoc: clean-apidoc + sphinx-apidoc \ + --private \ + --module-first \ + --force \ + --output-dir "$(SOURCEDIR)/api" \ + ../scripts \ + ../rules + +clean-apidoc: + rm -rf "$(SOURCEDIR)/api" + +clean: clean-apidoc + rm -rf "$(BUILDDIR)" diff --git a/docs/source/developer.rst b/docs/source/developer.rst new file mode 100644 index 0000000..b6d7560 --- /dev/null +++ b/docs/source/developer.rst @@ -0,0 +1,15 @@ +Developers Guide +=============== + +Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files. +These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory. +In general the structure is that a series of rules are defined to run on some calibration data generation +a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier. +For most rules there are 2 versions the basic version and the partition version where the first uses a single run +while the latter will group many runs together. +This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets `_ repository. + +Each rule has specified its inputs and outputs along with how to generate which can be +a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory. +Additional parameters can also be defined. +Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..8534e71 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,41 @@ +Welcome to legend-dataflow's documentation! +================================== + +*legend-dataflow* is a Python package based on Snakemake ``_ +for running the data production of LEGEND. +It is designed to calibrate and optimise hundreds of channels in parallel before +bringing them all together to process the data. It takes as an input the metadata +at `legend metadata `_. + +Getting started +--------------- + +It is recommended to install and use the package through the `legend-prodenv `_. + +Next steps +---------- + +.. toctree:: + :maxdepth: 1 + + Package API reference + +.. toctree:: + :maxdepth: 1 + + tutorials + +.. toctree:: + :maxdepth: 1 + :caption: Related projects + + LEGEND Data Objects + Decoding Digitizer Data + Digital Signal Processing + Pygama + +.. toctree:: + :maxdepth: 1 + :caption: Development + + Source Code diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst new file mode 100644 index 0000000..fb3e81b --- /dev/null +++ b/docs/source/user_manual.rst @@ -0,0 +1,98 @@ +Configuration +============= + +Data processing resources are configured via a single site-dependent (and +possibly user-dependent) configuration file, generally named ``config.json``. +Although you can choose any arbitrary name. + +A template for this file is located at ``templates/config.json`` +which can be copied to the working directory +the paths adjusted as necessary. Note that, when running Snakemake, +the default path to the config file is ``./config.json``. + +Profiles +======== + +A number of profiles are also included in the ``profiles`` directory. If none are specified, +the default profile is used. The profile can be specified by using the ``--profile`` option +when running Snakemake. These control how many jobs are run simultaneously, based on how many cores +are specified and the memory constraints of the system. A full list of all the options +that can be specified to snakemake can be found at `snakemake `_. + + +Running the Dataflow +==================== + +To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file +generation. In a simple case this may just be a single file e.g. +```shell +$ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5 +``` +This would generate the file and all the files that are required to generate it. +In general though we want to generate a large number of files, and we can do this using the ``gen`` target. + +Main output generation +====================== + +Usually, the main output will be determined by a file-list. +The special output target ``{label}-{tier}.gen`` is used to +generate all files that follow the label up to the specified tier. +The label is composed of the following parts: +- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file +in the `legend-datasets `_ repository. +- experiment: the experiment name i.e. l200 +- period: the period of the data e.g. p03 +- run: the run number e.g. r000 +- datatype: the data type e.g. cal +- timestamp: the timestamp of the data e.g. 20230401T000000Z + +Example: +```shell +$ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen +``` + +You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen`` +If you want to specify a lower part of the label but leave a higher part free, +you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` . +Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between +e.g. ``all-l200-p03-r000_r001-dsp.gen``. + +After the files +are created, the empty file ``{label}-{tier}.gen```` will be created to +mark the successful data production. + + +Monitoring +========== + +Snakemake supports monitoring by connecting to a +`panoptes `_ server. + +Run (e.g.) +```shell +$ panoptes --port 5000 +``` +in the background to run a panoptes server instance, which comes with a +GUI that can be accessed with a web-brower on the specified port. + +Then use the Snakemake option ``--wms-monitor`` to instruct Snakemake to push +progress information to the panoptes server: +```shell +snakemake --wms-monitor http://127.0.0.1:5000 [...] +``` + +Using software containers +========================= + +This dataflow doesn't use Snakemake's internal Singularity support, but +instead supports Singularity containers via +`venv `_ environments +for greater control. + +To use this, the path to ``venv`` and the name of the environment must be set +in ``config.json``. + +This is only relevant then running Snakemake *outside* of the software +container, e.g. when using a batch system (see below). If Snakemake +and the whole workflow is run inside of a container instance, no +container-related settings in ``config.json`` are required. From 9639200d37d4039bd74460d19665acedccdfc2c4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Dec 2024 22:46:01 +0100 Subject: [PATCH 012/101] add ability to specify different file selections and cleanup --- rules/filelist_gen.smk | 127 ++++++++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 41 deletions(-) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index cb27661..d0356a8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -5,9 +5,34 @@ from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind - -def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): +concat_datatypes = ["phy"] +concat_tiers = ["skm", "pet_concat", "evt_concat"] +blind_datatypes = ["phy"] + + +def expand_runs(in_dict): + """ + This function expands out the runs if a range is specified in the dictionary + e.g. + { + "p01": "r001..r005" + } + """ + for per, run_list in in_dict.items(): + if isinstance(run_list, str) and ".." in runs: + start, end = runs.split("..") + in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)] + return in_dict + + +def get_analysis_runs( + ignore_keys_file=None, analysis_runs_file=None, file_selection="all" +): + """ + This function reads in the ignore_keys and analysis_runs files and returns the dictionaries + """ ignore_keys = [] + analysis_runs = {} if ignore_keys_file is not None: if Path(ignore_keys_file).is_file(): if Path(ignore_keys_file).suffix == ".json": @@ -20,20 +45,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): with Path(ignore_keys_file).open() as f: ignore_keys = yaml.safe_load(f) else: - raise Warning( + raise ValueError( "ignore_keys_file file not in json, yaml or keylist format" ) - ignore_keys = [ + ignore_keys = [ # remove any comments in the keylist key.split("#")[0].strip() if "#" in key else key.strip() for key in ignore_keys ] else: - print("no ignore_keys.keylist file found") - ignore_keys = [] - else: - ignore_keys = [] + msg = f"no ignore_keys file found: {ignore_keys_file}" + raise ValueError(msg) - if analysis_runs_file is not None: + if analysis_runs_file is not None and file_selection != "all": if Path(analysis_runs_file).is_file(): if Path(ignore_keys_file).suffix == ".json": with Path(analysis_runs_file).open() as f: @@ -42,13 +65,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None): with Path(analysis_runs_file).open() as f: analysis_runs = yaml.safe_load(f) else: - raise Warning("analysis_runs file not in json or yaml format") - analysis_runs = [] + msg = f"analysis_runs file not in json or yaml format: {analysis_runs_file}" + raise ValueError(msg) + if file_selection in analysis_runs: + analysis_runs = expand_runs( + analysis_runs[file_selection] + ) # select the file_selection and expand out the runs + else: + msg = f"Unknown file selection: {file_selection} not in {list(analysis_runs)}" + raise ValueError(msg) else: - analysis_runs = [] - print("no analysis_runs file found") - else: - analysis_runs = [] + msg = f"no analysis_runs file found: {analysis_runs_file}" + raise ValueError(msg) return analysis_runs, ignore_keys @@ -75,9 +103,14 @@ def get_keys(keypart): def get_pattern(setup, tier): + """ + Helper function to get the search pattern for the given tier, + some tiers such as skm need to refer to a different pattern when looking for files + as only phy files are taken to skm others are only taken to pet + """ if tier == "blind": fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) - elif tier == "skm" or tier == "pet_concat": + elif tier in ("skm", "pet_concat"): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) @@ -87,6 +120,9 @@ def get_pattern(setup, tier): def concat_phy_filenames(setup, phy_filenames, tier): + """ + This function concatenates the files from the same run together + """ fn_pattern = get_pattern(setup, tier) # group files by run sorted_phy_filenames = run_grouper(phy_filenames) @@ -110,18 +146,20 @@ def build_filelist( tier, ignore_keys=None, analysis_runs=None, - file_selection="all", ): + """ + This function builds the filelist for the given filekeys, search pattern and tier. + It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict + """ fn_pattern = get_pattern(setup, tier) if ignore_keys is None: ignore_keys = [] if analysis_runs is None: - analysis_runs = [] + analysis_runs = {} phy_filenames = [] other_filenames = [] - for key in filekeys: fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] files = glob.glob(fn_glob_pattern) @@ -131,7 +169,7 @@ def build_filelist( if _key.name in ignore_keys: pass else: - if tier == "blind" and _key.datatype == "phy": + if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( _key, get_pattern_tier_raw_blind(setup) ) @@ -142,32 +180,38 @@ def build_filelist( else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) - if file_selection == "all": - if _key.datatype == "phy": + if analysis_runs == {}: + if ( + _key.datatype in concat_datatypes + ): # separate out phy files as some tiers these are concatenated phy_filenames += filename else: other_filenames += filename - elif file_selection == "sel": - if analysis_runs == "all" or ( - _key.period in analysis_runs + else: + if ( + _key.period + in analysis_runs # check if period in analysis_runs dicts and ( - _key.run in analysis_runs[_key.period] - or analysis_runs[_key.period] == "all" + _key.run + in analysis_runs[ + _key.period + ] # check if run in analysis_runs dicts + or analysis_runs[_key.period] + == "all" # or if runs is just specified as "all" ) ): - if _key.datatype == "phy": - phy_filenames += filename + if _key.datatype in concat_datatypes: + phy_filenames += filename # separate out phy files as some tiers these are concatenated else: other_filenames += filename - else: - msg = "unknown file selection" - raise ValueError(msg) phy_filenames = sorted(phy_filenames) other_filenames = sorted(other_filenames) - if tier == "skm" or tier == "pet_concat" or tier == "evt_concat": - phy_filenames = concat_phy_filenames(setup, phy_filenames, tier) + if tier in concat_tiers: + phy_filenames = concat_phy_filenames( + setup, phy_filenames, tier + ) # concat phy files return phy_filenames + other_filenames @@ -175,10 +219,11 @@ def build_filelist( def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): - file_selection = wildcards.label[:3] - keypart = wildcards.label[3:] - - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + file_selection = wildcards.label.split("-", 1)[0] + keypart = f'-{wildcards.label.split("-", 1)[1]}' # remove the file selection from the keypart + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) @@ -189,7 +234,6 @@ def get_filelist( wildcards.tier, ignore_keys, analysis_runs, - file_selection, ) @@ -204,7 +248,9 @@ def get_filelist_full_wildcards( ): keypart = f"-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}" - analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file) + analysis_runs, ignore_keys = get_analysis_runs( + ignore_keys_file, analysis_runs_file, file_selection + ) filekeys = get_keys(keypart) return build_filelist( @@ -214,5 +260,4 @@ def get_filelist_full_wildcards( tier, ignore_keys, analysis_runs, - file_selection, ) From 0cb28b69de8f30acf0b21fc272b9515293b2cf97 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Dec 2024 22:49:33 +0100 Subject: [PATCH 013/101] updates for new meta, switch to detector keying in configs --- Snakefile | 23 ++++---- rules/dsp.smk | 37 ++++++++++-- rules/hit.smk | 9 +++ rules/pht.smk | 7 +++ rules/pht_fast.smk | 2 + rules/psp.smk | 13 +++-- rules/tcm.smk | 1 + scripts/build_dsp.py | 18 +++++- scripts/merge_channels.py | 48 ++++++++++++--- scripts/pars_dsp_dplms.py | 21 ++++--- scripts/pars_dsp_eopt.py | 24 ++++---- scripts/pars_dsp_event_selection.py | 19 +++--- scripts/pars_dsp_nopt.py | 17 +++--- scripts/pars_dsp_tau.py | 13 ++++- scripts/pars_hit_aoe.py | 20 +++++-- scripts/pars_hit_ecal.py | 16 ++--- scripts/pars_hit_lq.py | 29 +++++---- scripts/pars_hit_qc.py | 91 +++++++++++++++++++++-------- scripts/pars_pht_aoecal.py | 13 +++-- scripts/pars_pht_fast.py | 14 +++-- scripts/pars_pht_lqcal.py | 14 +++-- scripts/pars_pht_partcal.py | 22 +++---- scripts/pars_pht_qc.py | 37 ++++++------ scripts/pars_pht_qc_phy.py | 19 +++--- scripts/pars_tcm_pulser.py | 9 ++- scripts/util/convert_np.py | 14 +++++ 26 files changed, 385 insertions(+), 165 deletions(-) create mode 100644 scripts/util/convert_np.py diff --git a/Snakefile b/Snakefile index 39a3dee..0838a8c 100644 --- a/Snakefile +++ b/Snakefile @@ -133,15 +133,15 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # remove filelists - files = glob.glob(os.path.join(filelist_path(setup), "*")) - for file in files: - if os.path.isfile(file): - os.remove(file) - if os.path.exists(filelist_path(setup)): - os.rmdir(filelist_path(setup)) - - # remove logs + # # remove filelists + # files = glob.glob(os.path.join(filelist_path(setup), "*")) + # for file in files: + # if os.path.isfile(file): + # os.remove(file) + # if os.path.exists(filelist_path(setup)): + # os.rmdir(filelist_path(setup)) + + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): @@ -171,11 +171,12 @@ rule gen_filelist: analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - Path(filelist_path(setup)) / "{label}-{tier}.filelist", + temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: if len(input) == 0: print( - "WARNING: No files found for the given pattern\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" + f"WARNING: No files found for the given pattern:{wildcards.label}", + "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", ) with open(output[0], "w") as f: for fn in input: diff --git a/rules/dsp.smk b/rules/dsp.smk index 3fa105c..34f7422 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -58,13 +58,14 @@ rule build_pars_dsp_tau: "{basedir}/../scripts/pars_dsp_tau.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " "--plot_path {output.plots} " "--output_file {output.decay_const} " "--pulser_file {input.pulser} " - "--raw_files {input.files}" + "--raw_files {input.files} " rule build_pars_event_selection: @@ -93,6 +94,7 @@ rule build_pars_event_selection: "{basedir}/../scripts/pars_dsp_event_selection.py " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -132,6 +134,7 @@ rule build_pars_dsp_nopt: "--database {input.database} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -175,6 +178,7 @@ rule build_pars_dsp_dplms: "--inplots {input.inplots} " "--configs {configs} " "--log {log} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -210,6 +214,7 @@ rule build_pars_dsp_eopt: "{basedir}/../scripts/pars_dsp_eopt.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -226,9 +231,9 @@ rule build_svm_dsp: hyperpars=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" ), - train_data=lambda wildcards: get_input_par_file( - wildcards, "dsp", "svm_hyperpars" - ).replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "dsp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -274,9 +279,12 @@ rule build_plts_dsp: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_plts(setup, "dsp"), group: @@ -286,6 +294,7 @@ rule build_plts_dsp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_dsp_objects: @@ -300,6 +309,9 @@ rule build_pars_dsp_objects: name="objects", extension="pkl", ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_pars( setup, @@ -315,6 +327,8 @@ rule build_pars_dsp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp_db: @@ -324,9 +338,12 @@ rule build_pars_dsp_db: f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, - configs, + det_status, chan_maps, ), + params: + timestamp="{timestamp}", + datatype="cal", output: temp( get_pattern_pars_tmp( @@ -342,6 +359,8 @@ rule build_pars_dsp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_pars_dsp: @@ -369,6 +388,9 @@ rule build_pars_dsp: extension="dir", check_in_cycle=check_in_cycle, ), + params: + timestamp="{timestamp}", + datatype="cal", output: out_file=get_pattern_pars( setup, @@ -386,6 +408,8 @@ rule build_pars_dsp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " rule build_dsp: @@ -415,6 +439,7 @@ rule build_dsp: "{basedir}/../scripts/build_dsp.py " "--log {log} " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/hit.smk b/rules/hit.smk index af1fcaf..bb42651 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -44,6 +44,7 @@ rule build_qc: filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist" ), pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), params: timestamp="{timestamp}", datatype="cal", @@ -65,11 +66,13 @@ rule build_qc: "--timestamp {params.timestamp} " "--channel {params.channel} " "--configs {configs} " + "--metadata {meta} " "--plot_path {output.plot_file} " "--save_path {output.qc_file} " "--pulser_file {input.pulser} " "--cal_files {input.files} " "--fft_files {input.fft_files} " + "--overwrite_files {input.overwrite_files} " # This rule builds the energy calibration using the calibration dsp files @@ -158,6 +161,7 @@ rule build_aoe_calibration: "{basedir}/../scripts/pars_hit_aoe.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -204,6 +208,7 @@ rule build_lq_calibration: "{basedir}/../scripts/pars_hit_lq.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -246,6 +251,7 @@ rule build_pars_hit_objects: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_plts_hit: @@ -269,6 +275,7 @@ rule build_plts_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input} " "--output {output} " + "--channelmap {meta} " rule build_pars_hit: @@ -300,6 +307,7 @@ rule build_pars_hit: "{basedir}/../scripts/merge_channels.py " "--input {params.ro_input[infiles]} " "--output {output} " + "--channelmap {meta} " rule build_hit: @@ -326,6 +334,7 @@ rule build_hit: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht.smk b/rules/pht.smk index dad1a24..e638832 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -129,6 +129,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -181,6 +182,7 @@ rule build_pht_qc: "{basedir}/../scripts/pars_pht_qc.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -536,6 +538,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -596,6 +599,7 @@ rule build_pht_aoe_calibrations: "{basedir}/../scripts/pars_pht_aoecal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -714,6 +718,7 @@ for key, dataset in part.datasets.items(): "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -769,6 +774,7 @@ rule build_pht_lq_calibration: "{basedir}/../scripts/pars_pht_lqcal.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -896,6 +902,7 @@ rule build_pht: "{swenv} python3 -B " "{basedir}/../scripts/build_hit.py " f"--configs {ro(configs)} " + "--metadata {meta} " "--log {log} " "--tier {params.tier} " "--datatype {params.datatype} " diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index f83e534..9369b6b 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -108,6 +108,7 @@ for key, dataset in part.datasets.items(): f"{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--inplots {input.inplots} " @@ -166,6 +167,7 @@ rule par_pht_fast: "{basedir}/../scripts/pars_pht_fast.py " "--log {log} " "--configs {configs} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " diff --git a/rules/psp.smk b/rules/psp.smk index 53e8f59..260be19 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -182,11 +182,9 @@ rule build_svm_psp: hyperpars=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" ), - train_data=lambda wildcards: get_input_par_file( - wildcards, "psp", "svm_hyperpars" - ) - .as_posix() - .replace("hyperpars.json", "train.lh5"), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "psp", "svm_hyperpars") + ).replace("hyperpars.yaml", "train.lh5"), output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -252,6 +250,7 @@ rule build_pars_psp_objects: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_plts_psp: @@ -273,6 +272,7 @@ rule build_plts_psp: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp_db: @@ -300,6 +300,7 @@ rule build_pars_psp_db: "{basedir}/../scripts/merge_channels.py " "--input {input} " "--output {output} " + "--channelmap {meta} " rule build_pars_psp: @@ -344,6 +345,7 @@ rule build_pars_psp: "--in_db {input.in_db} " "--out_db {output.out_db} " "--input {input.in_files} " + "--channelmap {meta} " rule build_psp: @@ -373,6 +375,7 @@ rule build_psp: "{basedir}/../scripts/build_dsp.py " "--log {log} " f"--configs {ro(configs)} " + "--metadata {meta} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " diff --git a/rules/tcm.smk b/rules/tcm.smk index c1164bb..e3a3410 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -66,3 +66,4 @@ rule build_pulser_ids: "--channel {params.channel} " "--tcm_files {params.input} " "--pulser_file {output.pulser} " + "--metadata {meta} " diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 02bf6a1..902ac4b 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -7,7 +7,7 @@ import numpy as np from dspeed import build_dsp -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 @@ -27,11 +27,15 @@ def replace_list_with_array(dic): argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() @@ -41,14 +45,22 @@ def replace_list_with_array(dic): logging.getLogger("numba").setLevel(logging.INFO) logging.getLogger("parse").setLevel(logging.INFO) logging.getLogger("lgdo").setLevel(logging.INFO) +logging.getLogger("legendmeta").setLevel(logging.INFO) log = logging.getLogger(__name__) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + configs = TextDB(args.configs, lazy=True) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ "inputs" ]["processing_chain"] -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} +channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() +} db_files = [ par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") ] diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index e8994be..5fb6d68 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 from util.FileKey import ChannelProcKey @@ -37,6 +38,19 @@ def replace_path(d, old_path, new_path): type=str, required=False, ) +argparser.add_argument( + "--channelmap", + help="channelmap", + type=str, + required=False, + default=None, +) +argparser.add_argument( + "--timestamp", + help="timestamp", + type=str, + required=False, +) args = argparser.parse_args() # change to only have 1 output file for multiple inputs @@ -46,6 +60,12 @@ def replace_path(d, old_path, new_path): file_extension = Path(args.output).suffix +if args.channelmap is not None: + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmap(args.timestamp) +else: + chmap = None + if file_extension == ".dat" or file_extension == ".dir": out_file = Path(args.output).with_suffix("") else: @@ -61,9 +81,12 @@ def replace_path(d, old_path, new_path): for channel in channel_files: if Path(channel).suffix == file_extension: channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict else: msg = "Output file extension does not match input file extension" @@ -79,7 +102,11 @@ def replace_path(d, old_path, new_path): with Path(channel).open("rb") as r: channel_dict = pkl.load(r) fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel out_dict[channel_name] = channel_dict with Path(temp_output).open("wb") as w: @@ -89,12 +116,16 @@ def replace_path(d, old_path, new_path): elif file_extension == ".dat" or file_extension == ".dir": common_dict = {} - with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: for channel in channel_files: with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + + channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") common_dict[channel_name] = chan_common_dict @@ -109,8 +140,11 @@ def replace_path(d, old_path, new_path): for channel in channel_files: if Path(channel).suffix == file_extension: fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - channel_name = fkey.channel + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) lh5.write( diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 607613c..87403b8 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -15,10 +15,11 @@ argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) +argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -42,6 +43,10 @@ log = logging.getLogger(__name__) sto = lh5.LH5Store() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] @@ -56,11 +61,9 @@ t0 = time.time() log.info("\nLoad fft data") - energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read( - f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs - )[0] + raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0] t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") @@ -69,12 +72,12 @@ kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") if isinstance(dsp_config, (str, list)): @@ -107,7 +110,7 @@ dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) out_dict["dplms"][ "coefficients" - ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')" + ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: @@ -124,7 +127,7 @@ Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) sto.write( Table(col_dict={"dplms": dplms_pars}), - name=args.channel, + name=channel, lh5_file=args.lh5_path, wo_mode="overwrite", ) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index bcda090..d4f0098 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -26,12 +26,12 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) - argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -58,6 +58,10 @@ sto = lh5.LH5Store() t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][ @@ -108,12 +112,12 @@ ) peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0] + tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] t1 = time.time() log.info(f"Data Loaded in {(t1-t0)/60} minutes") @@ -318,32 +322,32 @@ out_alpha_dict = {} out_alpha_dict["cuspEmax_ctc"] = { "expression": "cuspEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["cuspEftp_ctc"] = { "expression": "cuspEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEmax_ctc"] = { "expression": "zacEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["zacEftp_ctc"] = { "expression": "zacEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEmax_ctc"] = { "expression": "trapEmax*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } out_alpha_dict["trapEftp_ctc"] = { "expression": "trapEftp*(1+dt_eff*a)", - "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)}, + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, } if "ctc_params" in db_dict: db_dict["ctc_params"].update(out_alpha_dict) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 2e6505b..f4dfd7d 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -83,10 +83,11 @@ def get_out_data( argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True) argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) @@ -108,6 +109,10 @@ def get_out_data( sto = lh5.LH5Store() t0 = time.time() + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ @@ -121,11 +126,11 @@ def get_out_data( db_dict = Props.read_from(args.decay_const) Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True) + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.peak_file}.{rand_num}" if peak_dict.pop("run_selection") is True: log.debug("Starting peak selection") - rng = np.random.default_rng() - rand_num = f"{rng.integers(0,99999):05d}" - temp_output = f"{args.peak_file}.{rand_num}" with Path(args.raw_filelist).open() as f: files = f.read().splitlines() @@ -141,13 +146,13 @@ def get_out_data( tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"] + tcm_files, channel, peak_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) - raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"] + raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"] peaks_kev = peak_dict["peaks"] kev_widths = peak_dict["kev_widths"] @@ -156,7 +161,7 @@ def get_out_data( final_cut_field = peak_dict["final_cut_field"] energy_parameter = peak_dict.get("energy_parameter", "trapTmax") - lh5_path = f"{args.channel}/raw" + lh5_path = f"{channel}/raw" if not isinstance(kev_widths, list): kev_widths = [kev_widths] diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 47261d2..5de3a59 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -20,6 +20,7 @@ argparser.add_argument("--inplots", help="inplots", type=str) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -44,6 +45,10 @@ t0 = time.time() +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + conf = LegendMetadata(path=args.configs) configs = conf.on(args.timestamp, system=args.datatype) dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][ @@ -61,9 +66,9 @@ raw_files = sorted(files) - energies = sto.read(f"{args.channel}/raw/daqenergy", raw_files)[0] + energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read(f"{args.channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] + tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") @@ -72,7 +77,7 @@ cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) cut_idxs = get_cut_indexes(dsp_data, cut_dict) tb_data = sto.read( - f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] + f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] )[0] log.info(f"... {len(tb_data)} baselines after cuts") @@ -81,12 +86,10 @@ if args.plot_path: out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1 + tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 ) else: - out_dict = pno.noise_optimization( - raw_files, dsp_config, db_dict.copy(), opt_dict, args.channel - ) + out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel) t2 = time.time() log.info(f"Optimiser finished in {(t2-t0)/60} minutes") diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 82cec2d..b584648 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -13,10 +13,13 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) + argparser.add_argument("--plot_path", help="plot path", type=str, required=False) argparser.add_argument("--output_file", help="output file", type=str, required=True) @@ -37,6 +40,10 @@ sto = lh5.LH5Store() log = logging.getLogger(__name__) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) config_dict = configs.on(args.timestamp, system=args.datatype) channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ @@ -66,14 +73,14 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" raise ValueError(msg) data = sto.read( - f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] + f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] )[0].view_as("pd") threshold = kwarg_dict.pop("threshold") @@ -89,7 +96,7 @@ cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0] tb_data = sto.read( - f"{args.channel}/raw", + f"{channel}/raw", input_file, idx=cuts, n_rows=kwarg_dict.pop("n_events"), diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index a393868..c30c7ef 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -15,6 +15,7 @@ from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -103,17 +104,20 @@ def aoe_calibration( argparser.add_argument("files", help="files", nargs="*", type=str) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--aoe_results", help="aoe_results", type=str) @@ -129,6 +133,10 @@ def aoe_calibration( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_aoecal" @@ -194,7 +202,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -213,7 +221,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -231,6 +239,7 @@ def eres_func(x): sigma_func=sigma_func, **kwarg_dict, ) + obj.pdf = obj.pdf.name # need to change eres func as can't pickle lambdas try: @@ -266,6 +275,9 @@ def eres_func(x): "pars": {"operations": cal_dict}, "results": results_dict, } + +final_hit_dict = convert_dict_np_to_float(final_hit_dict) + Props.write_to(args.hit_pars, final_hit_dict) Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index b310500..c94041d 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -22,6 +22,7 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) mpl.use("agg") @@ -452,8 +453,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) + channel = f"ch{chmap[args.channel].daq.rawid:07}" - det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"] + det_status = chmap[args.channel]["analysis"]["usability"] if args.in_hit_dict: hit_dict = Props.read_from(args.in_hit_dict) @@ -466,7 +468,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): database_dic = Props.read_from(db_files) - hit_dict.update(database_dic[args.channel]["ctc_params"]) + hit_dict.update(database_dic[channel]["ctc_params"]) # get metadata dictionary configs = LegendMetadata(path=args.configs) @@ -497,7 +499,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", hit_dict, params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"], threshold=kwarg_dict["threshold"], @@ -515,7 +517,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -698,14 +700,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if "monitoring_parameters" in kwarg_dict: monitor_dict = monitor_parameters( - files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"] + files, f"{channel}/dsp", kwarg_dict["monitoring_parameters"] ) results_dict.update({"monitoring_parameters": monitor_dict}) # get baseline plots and save all plots to file if args.plot_path: common_dict = baseline_tracking_plots( - sorted(files), f"{args.channel}/dsp", plot_options=bl_plots + sorted(files), f"{channel}/dsp", plot_options=bl_plots ) for plot in list(common_dict): @@ -739,7 +741,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}} + output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}}) Props.write_to(args.save_path, output_dict) # save calibration objects diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 579b34a..169b560 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -11,10 +11,12 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -128,12 +130,13 @@ def lq_calibration( argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) argparser.add_argument("--hit_pars", help="hit_pars", type=str) argparser.add_argument("--lq_results", help="lq_results", type=str) @@ -148,6 +151,10 @@ def lq_calibration( logging.getLogger("h5py").setLevel(logging.INFO) logging.getLogger("matplotlib").setLevel(logging.INFO) +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_hit_lqcal" @@ -197,7 +204,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( files, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -216,7 +223,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") ) else: msg = "No pulser file or tcm filelist provided" @@ -262,19 +269,19 @@ def eres_func(x): pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) -results_dict = dict(**eres_dict, lq=out_dict) +final_hit_dict = convert_dict_np_to_float( + { + "pars": {"operations": cal_dict}, + "results": dict(**eres_dict, lq=out_dict), + } +) Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -final_hit_dict = { - "pars": {"operations": cal_dict}, - "results": results_dict, -} Props.write_to(args.hit_pars, final_hit_dict) -Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) final_object_dict = dict( **object_dict, lq=obj, ) -Props.write_to(args.lq_results, final_object_dict) +Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) with Path(args.lq_results).open("wb") as w: pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 5311c46..320fee9 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -18,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,17 +29,26 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--overwrite_files", + help="overwrite_files", + type=str, + required=False, + nargs="*", + ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--tier", help="tier", type=str, default="hit") - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() @@ -51,6 +61,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -58,19 +72,37 @@ kwarg_dict = Props.read_from(channel_dict) + if args.overwrite_files: + overwrite = Props.read_from(args.overwrite_files) + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] + else: + overwrite = None + else: + overwrite = None + + if len(args.fft_files) == 1 and Path(args.fft_files[0]).suffix == ".filelist": + with Path(args.fft_files[0]).open() as f: + fft_files = f.read().splitlines() + else: + fft_files = args.fft_files + + if len(args.cal_files) == 1 and Path(args.cal_files[0]).suffix == ".filelist": + with Path(args.cal_files[0]).open() as f: + cal_files = f.read().splitlines() + else: + cal_files = args.fft_files + kwarg_dict_fft = kwarg_dict["fft_fields"] - if len(args.fft_files) > 0: + if len(fft_files) > 0: fft_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.fft_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( - args.fft_files, - f"{args.channel}/dsp", + fft_files, + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax"], ) @@ -123,31 +155,31 @@ hit_dict_fft = {} plot_dict_fft = {} + if overwrite is not None: + for name in kwarg_dict_fft["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_fft.update({cut_name: cut_dict}) + kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(args.cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( - args.cal_files, - f"{args.channel}/dsp", + cal_files, + f"{channel}/dsp", {}, - [*cut_fields, "timestamp", "trapTmax"], + [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), return_selection_mask=True, cal_energy_param="trapTmax", @@ -163,7 +195,7 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -201,16 +233,19 @@ for key in info.get("parameters", None): exp = re.sub(f"(? 500: + if len(data.query("is_pulser & ~is_recovering")) < 500: data = data.query("is_pulser & ~is_recovering") else: data = data.query("~is_pulser & ~is_recovering")[mask] @@ -222,9 +257,17 @@ display=1 if args.plot_path else 0, ) + if overwrite is not None: + for name in kwarg_dict_cal["cut_parameters"]: + for cut_name, cut_dict in overwrite.items(): + if name in cut_name: + hit_dict_cal.update({cut_name: cut_dict}) + hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + Path(args.save_path).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.save_path, hit_dict) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index e9573e3..ca938e5 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -255,12 +255,13 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) @@ -276,6 +277,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_aoecal" @@ -350,7 +355,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -372,7 +377,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 4064b3c..104ad05 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -54,13 +54,13 @@ def run_splitter(files): argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) @@ -77,6 +77,10 @@ def run_splitter(files): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: @@ -167,7 +171,7 @@ def run_splitter(files): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -191,7 +195,7 @@ def run_splitter(files): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -213,7 +217,7 @@ def run_splitter(files): object_dict, inplots_dict, args.timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2ba88af..2c67745 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -13,6 +13,7 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal @@ -251,12 +252,13 @@ def eres_func(x): argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) @@ -272,6 +274,10 @@ def eres_func(x): logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ "pars_pht_lqcal" @@ -337,7 +343,7 @@ def eres_func(x): # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict.pop("threshold"), @@ -360,7 +366,7 @@ def eres_func(x): tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a6eab18..a2d74e4 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -142,18 +142,14 @@ def calibrate_partition( object_dicts, plot_dicts, timestamp, - metadata_path, + chmap, configs, channel, datatype, gen_plots=True, ): - # load metadata - meta = LegendMetadata(path=metadata_path) - chmap = meta.channelmap(timestamp) - - det_status = chmap.map("daq.rawid")[int(channel[2:])]["analysis"]["usability"] + det_status = chmap[channel]["analysis"]["usability"] configs = LegendMetadata(path=configs) channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][ @@ -418,13 +414,13 @@ def calibrate_partition( argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) @@ -441,6 +437,10 @@ def calibrate_partition( logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + cal_dict = {} results_dicts = {} for ecal in args.ecal_file: @@ -498,7 +498,7 @@ def calibrate_partition( # load data in data, threshold_mask = load_data( final_dict, - f"{args.channel}/dsp", + f"{channel}/dsp", cal_dict, params=params, threshold=kwarg_dict["threshold"], @@ -521,7 +521,7 @@ def calibrate_partition( tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -543,7 +543,7 @@ def calibrate_partition( object_dict, inplots_dict, timestamp, - args.metadata, + chmap, args.configs, args.channel, args.datatype, diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 790ee0a..495c87b 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -18,6 +18,7 @@ get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,6 +29,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument( "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False ) @@ -39,12 +41,13 @@ ) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -62,6 +65,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -85,8 +92,8 @@ if args.overwrite_files: overwrite = Props.read_from(args.overwrite_files) - if args.channel in overwrite: - overwrite = overwrite[args.channel]["pars"]["operations"] + if channel in overwrite: + overwrite = overwrite[channel]["pars"]["operations"] else: overwrite = None else: @@ -111,15 +118,15 @@ if len(fft_files) > 0: fft_fields = get_keys( [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(fft_files[0], f"{args.channel}/dsp/") + key.replace(f"{channel}/dsp/", "") + for key in ls(fft_files[0], f"{channel}/dsp/") ], kwarg_dict_fft["cut_parameters"], ) fft_data = load_data( fft_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"], ) @@ -184,26 +191,20 @@ kwarg_dict_cal = kwarg_dict["cal_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], kwarg_dict_cal["cut_parameters"], ) if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] cut_fields += get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(cal_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")], init_cal["cut_parameters"], ) # load data in data, threshold_mask = load_data( cal_files, - f"{args.channel}/dsp", + f"{channel}/dsp", {}, [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"], threshold=kwarg_dict_cal.get("threshold", 0), @@ -226,7 +227,7 @@ tcm_files = f.read().splitlines() tcm_files = sorted(np.unique(tcm_files)) ids, total_mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"] + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] ) else: msg = "No pulser file or tcm filelist provided" @@ -303,6 +304,8 @@ hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal} plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal} + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: Path(file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(file, hit_dict) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 48f3d9f..4f87afb 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -17,6 +17,7 @@ generate_cut_classifiers, get_keys, ) +from util.convert_np import convert_dict_np_to_float log = logging.getLogger(__name__) @@ -28,12 +29,13 @@ argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--configs", help="config", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) argparser.add_argument( "--save_path", @@ -51,6 +53,10 @@ logging.getLogger("matplotlib").setLevel(logging.INFO) logging.getLogger("legendmeta").setLevel(logging.INFO) + meta = LegendMetadata(path=args.metadata) + chmap = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{chmap[args.channel].daq.rawid:07}" + # get metadata dictionary configs = LegendMetadata(path=args.configs) channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] @@ -88,15 +94,12 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( - [ - key.replace(f"{args.channel}/dsp/", "") - for key in ls(phy_files[0], f"{args.channel}/dsp/") - ], + [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")], kwarg_dict_fft["cut_parameters"], ) data = sto.read( - f"{args.channel}/dsp/", + f"{channel}/dsp/", phy_files, field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"], idx=np.where(bl_mask)[0], @@ -145,6 +148,8 @@ log.debug("fft cuts applied") log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}") + hit_dict = convert_dict_np_to_float(hit_dict) + for file in args.save_path: Path(file).name.mkdir(parents=True, exist_ok=True) Props.write_to(file, {"pars": {"operations": hit_dict}}) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 27c1101..9e6ad42 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -10,6 +10,7 @@ argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -36,6 +37,10 @@ config_dict = configs.on(args.timestamp, system=args.datatype) kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] +meta = LegendMetadata(path=args.metadata) +channel_dict = meta.channelmap(args.timestamp, system=args.datatype) +channel = f"ch{channel_dict[args.channel].daq.rawid}" + kwarg_dict = Props.read_from(kwarg_dict) if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": @@ -46,9 +51,7 @@ tcm_files = args.tcm_files # get pulser mask from tcm files tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) +ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")) Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/scripts/util/convert_np.py b/scripts/util/convert_np.py new file mode 100644 index 0000000..cdc363c --- /dev/null +++ b/scripts/util/convert_np.py @@ -0,0 +1,14 @@ +import numpy as np + + +def convert_dict_np_to_float(dic): + for key in dic: + if isinstance(dic[key], dict): + convert_dict_np_to_float(dic[key]) + elif isinstance(dic[key], (np.float32, np.float64)): + dic[key] = float(dic[key]) + elif isinstance(dic[key], (list, tuple)): + dic[key] = [ + float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key] + ] + return dic From 4f7e4058bac3836a303cb6b0ceb06cf484c30d07 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 4 Dec 2024 17:40:05 +0100 Subject: [PATCH 014/101] debugging --- rules/ann.smk | 101 ++++++++++++++--------- rules/dsp.smk | 165 +++++++++++++++++++------------------- rules/evt.smk | 142 +++++++++++++++++++++----------- rules/psp.smk | 1 + scripts/build_ann.py | 124 ---------------------------- scripts/build_dsp.py | 150 +++++++++++++++++++++------------- scripts/build_hit.py | 31 ++++--- scripts/build_tcm.py | 16 +++- scripts/merge_channels.py | 6 +- scripts/pars_dsp_tau.py | 28 +++---- scripts/pars_hit_lq.py | 2 +- 11 files changed, 380 insertions(+), 386 deletions(-) delete mode 100644 scripts/build_ann.py diff --git a/rules/ann.smk b/rules/ann.smk index 64cdd50..15558ae 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -4,51 +4,72 @@ to apply the ann and risetime cuts for psd. """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.utils import par_dsp_path from scripts.util.patterns import ( - get_pattern_tier_dsp, - get_pattern_tier_psp, - get_pattern_tier_ann, get_pattern_tier, get_pattern_log, get_pattern_pars, - get_pattern_pars_overwrite, ) -for tier in ["ann", "pan"]: - rule: - input: - dsp_file=( - get_pattern_tier_dsp(setup) - if tier == "ann" - else get_pattern_tier_psp(setup) - ), - pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"), - params: - timestamp="{timestamp}", - datatype="{datatype}", - output: - tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, f"{tier}_db"), - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-ann" - resources: - runtime=300, - mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, - shell: - "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_ann.py')} " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {input.dsp_file} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {input.pars_file} " +rule build_ann: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "ann_db"), + log: + get_pattern_log(setup, "tier_ann"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier ann " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " - set_last_rule_name(workflow, f"build_{tier}") + +rule build_pan: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + output: + tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "pan_db"), + log: + get_pattern_log(setup, "tier_pan"), + group: + "tier-ann" + resources: + runtime=300, + mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, + shell: + "{swenv} python3 -B " + f"{workflow.source_path('../scripts/build_dsp.py')} " + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + f"--tier pan " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {input.dsp_file} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {input.pars_file} " diff --git a/rules/dsp.smk b/rules/dsp.smk index 34f7422..7ae67a7 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -363,86 +363,85 @@ rule build_pars_dsp_db: "--channelmap {meta} " -rule build_pars_dsp: - input: - in_files=lambda wildcards: get_par_chanlist( - setup, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - setup, - "dsp", - datatype="cal", - ), - plts=get_pattern_plts(setup, "dsp"), - objects=get_pattern_pars( - setup, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - out_file=get_pattern_pars( - setup, - "dsp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), - group: - "merge-dsp" - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " - - -rule build_dsp: - input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), - pars_file=ancient( - lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" - ) - ), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - output: - tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "dsp_db"), - log: - get_pattern_log(setup, "tier_dsp"), - group: - "tier-dsp" - resources: - runtime=300, - mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " - "--log {log} " - f"--configs {ro(configs)} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--input {params.ro_input[raw_file]} " - "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {params.ro_input[pars_file]} " +# rule build_pars_dsp: +# input: +# in_files=lambda wildcards: get_par_chanlist( +# setup, +# f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", +# "dsp", +# basedir, +# det_status, +# chan_maps, +# name="dplms", +# extension="lh5", +# ), +# in_db=get_pattern_pars_tmp( +# setup, +# "dsp", +# datatype="cal", +# ), +# plts=get_pattern_plts(setup, "dsp"), +# objects=get_pattern_pars( +# setup, +# "dsp", +# name="objects", +# extension="dir", +# check_in_cycle=check_in_cycle, +# ), +# params: +# timestamp="{timestamp}", +# datatype="cal", +# output: +# out_file=get_pattern_pars( +# setup, +# "dsp", +# extension="lh5", +# check_in_cycle=check_in_cycle, +# ), +# out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), +# group: +# "merge-dsp" +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/merge_channels.py " +# "--output {output.out_file} " +# "--in_db {input.in_db} " +# "--out_db {output.out_db} " +# "--input {input.in_files} " +# "--timestamp {params.timestamp} " +# "--channelmap {meta} " +# rule build_dsp: +# input: +# raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), +# pars_file=ancient( +# lambda wildcards: ParsCatalog.get_par_file( +# setup, wildcards.timestamp, "dsp" +# ) +# ), +# params: +# timestamp="{timestamp}", +# datatype="{datatype}", +# ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, +# output: +# tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), +# db_file=get_pattern_pars_tmp(setup, "dsp_db"), +# log: +# get_pattern_log(setup, "tier_dsp"), +# group: +# "tier-dsp" +# resources: +# runtime=300, +# mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, +# shell: +# "{swenv} python3 -B " +# "{basedir}/../scripts/build_dsp.py " +# "--log {log} " +# "--tier dsp " +# f"--configs {ro(configs)} " +# "--metadata {meta} " +# "--datatype {params.datatype} " +# "--timestamp {params.timestamp} " +# "--input {params.ro_input[raw_file]} " +# "--output {output.tier_file} " +# "--db_file {output.db_file} " +# "--pars_file {params.ro_input[pars_file]} " diff --git a/rules/evt.smk b/rules/evt.smk index 9239b96..112c92c 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -11,50 +11,91 @@ from scripts.util.patterns import ( ) -for tier in ("evt", "pet"): +rule build_evt: + input: + dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "ann", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "hit" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="evt", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="evt", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_evt"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( + f"{swenv} python3 -B " + f"{basedir}/../scripts/build_evt.py " + f"--configs {ro(configs)} " + f"--metadata {ro(meta)} " + "--log {log} " + "--tier {params.tier} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--xtc_file {params.ro_input[xtalk_matrix]} " + "--par_files {params.ro_input[par_files]} " + "--hit_file {params.ro_input[hit_file]} " + "--tcm_file {params.ro_input[tcm_file]} " + "--dsp_file {params.ro_input[dsp_file]} " + "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " - rule: - input: - dsp_file=( - get_pattern_tier(setup, "dsp", check_in_cycle=False) - if tier == "evt" - else get_pattern_tier(setup, "psp", check_in_cycle=False) - ), - hit_file=( - get_pattern_tier(setup, "hit", check_in_cycle=False) - if tier == "evt" - else get_pattern_tier(setup, "pht", check_in_cycle=False) - ), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), - xtalk_matrix=lambda wildcards: get_input_par_file( - tier=tier, wildcards=wildcards, name="xtc" - ), - ann_file=branch( - lambda wildcards: tier if wildcards["period"][1:] <= 11 else "none", - cases={ - "evt": get_pattern_tier(setup, "ann", check_in_cycle=False), - "pet": get_pattern_tier(setup, "pan", check_in_cycle=False), - "none": None, - }, - ), - par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" - ), - output: - get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle), - params: - timestamp="{timestamp}", - datatype="{datatype}", - tier=tier, - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - log: - get_pattern_log(setup, f"tier_{tier}"), - group: - "tier-evt" - resources: - runtime=300, - mem_swap=50, - shell: + shell(shell_string) + + +rule build_pet: + input: + dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False), + tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + ann_file=lambda wildcards: ( + None + if int(wildcards["period"][1:]) > 11 + else get_pattern_tier(setup, "pan", check_in_cycle=False) + ), + par_files=lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "pht" + ), + xtalk_matrix=lambda wildcards: get_input_par_file( + tier="pet", wildcards=wildcards, name="xtc" + ), + output: + get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), + params: + timestamp="{timestamp}", + datatype="{datatype}", + tier="pet", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + log: + get_pattern_log(setup, f"tier_pet"), + group: + "tier-evt" + resources: + runtime=300, + mem_swap=50, + run: + shell_string = ( f"{swenv} python3 -B " f"{basedir}/../scripts/build_evt.py " f"--configs {ro(configs)} " @@ -68,10 +109,15 @@ for tier in ("evt", "pet"): "--hit_file {params.ro_input[hit_file]} " "--tcm_file {params.ro_input[tcm_file]} " "--dsp_file {params.ro_input[dsp_file]} " - "--ann_file {params.ro_input[ann_file]} " "--output {output} " + ) + if input.ann_file is not None: + shell_string += "--ann_file {params.ro_input[ann_file]} " + + shell(shell_string) + - set_last_rule_name(workflow, f"build_{tier}") +for evt_tier in ("evt", "pet"): rule: wildcard_constraints: @@ -87,14 +133,14 @@ for tier in ("evt", "pet"): ) ), output: - get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle), + get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle), params: timestamp="all", datatype="{datatype}", lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", ro_input=lambda _, input: utils.as_ro(setup, input), log: - get_pattern_log_concat(setup, f"tier_{tier}_concat"), + get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"), group: "tier-evt" shell: @@ -102,4 +148,4 @@ for tier in ("evt", "pet"): "--output {output} " "-- {params.ro_input} &> {log}" - set_last_rule_name(workflow, f"concat_{tier}") + set_last_rule_name(workflow, f"concat_{evt_tier}") diff --git a/rules/psp.smk b/rules/psp.smk index 260be19..9fc0861 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -374,6 +374,7 @@ rule build_psp: "{swenv} python3 -B " "{basedir}/../scripts/build_dsp.py " "--log {log} " + "--tier psp " f"--configs {ro(configs)} " "--metadata {meta} " "--datatype {params.datatype} " diff --git a/scripts/build_ann.py b/scripts/build_ann.py deleted file mode 100644 index 224877a..0000000 --- a/scripts/build_ann.py +++ /dev/null @@ -1,124 +0,0 @@ -import argparse -import json -import logging -import os -import pathlib -import re -import time -import warnings - -os.environ["LGDO_CACHE"] = "false" -os.environ["LGDO_BOUNDSCHECK"] = "false" -os.environ["DSPEED_CACHE"] = "false" -os.environ["DSPEED_BOUNDSCHECK"] = "false" - -import lgdo.lh5 as lh5 -import numpy as np -from dspeed import build_dsp -from legendmeta import LegendMetadata -from legendmeta.catalog import Props - - -def replace_list_with_array(dic): - for key, value in dic.items(): - if isinstance(value, dict): - dic[key] = replace_list_with_array(value) - elif isinstance(value, list): - dic[key] = np.array(value, dtype="float32") - else: - pass - return dic - - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -log = logging.getLogger(__name__) - -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][ - "inputs" -]["processing_chain"] - -channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()} -db_files = [ - par_file - for par_file in args.pars_file - if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml" -] - -database_dic = Props.read_from(db_files, subst_pathvar=True) -database_dic = replace_list_with_array(database_dic) - -pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -start = time.time() - -build_dsp( - args.input, - temp_output, - {}, - database=database_dic, - chan_config=channel_dict, - write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, -) - -log.info(f"build_ann finished in {time.time()-start}") - -os.rename(temp_output, args.output) - -if "ann" in args.output: - key = os.path.basename(args.output).replace("-tier_ann.lh5", "") -else: - key = os.path.basename(args.output).replace("-tier_pan.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "ann": outputs, - }, - "valid_keys": {key: {"valid_channels": {"ann": channels}}}, -} -pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True) -with open(args.db_file, "w") as w: - json.dump(full_dict, w, indent=4) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 902ac4b..c505058 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,9 +1,10 @@ import argparse import logging -import pathlib +import logging.config import re import time import warnings +from pathlib import Path import numpy as np from dspeed import build_dsp @@ -32,6 +33,7 @@ def replace_list_with_array(dic): argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--tier", help="Tier", type=str, required=True) argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) argparser.add_argument("--input", help="input file", type=str) @@ -40,35 +42,49 @@ def replace_list_with_array(dic): argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] +if args.tier in ["dsp", "psp"]: + config_dict = config_dict["tier_dsp"] +elif args.tier in ["ann", "pan"]: + config_dict = config_dict["tier_ann"] +else: + msg = f"Tier {args.tier} not supported" + raise ValueError(msg) + +channel_dict = config_dict["inputs"]["processing_chain"] +settings_dict = config_dict["options"].get("settings", {}) +if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][ - "inputs" -]["processing_chain"] - -channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) - for chan, file in channel_dict.items() -} +if args.tier in ["ann", "pan"]: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) + for chan, file in channel_dict.items() + } +else: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() + } db_files = [ - par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml") + par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) database_dic = replace_list_with_array(database_dic) -pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True) +Path(args.output).parent.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng() rand_num = f"{rng.integers(0, 99999):05d}" @@ -83,42 +99,66 @@ def replace_list_with_array(dic): database=database_dic, chan_config=channel_dict, write_mode="r", - buffer_len=3200 if args.datatype == "cal" else 3200, - block_width=16, + buffer_len=settings_dict.get("buffer_len", 1000), + block_width=settings_dict.get("block_width", 16), ) log.info(f"build_dsp finished in {time.time()-start}") - -pathlib.Path(temp_output).rename(args.output) - -key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "") - -raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - -raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] - -outputs = {} -channels = [] -for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - -full_dict = { - "valid_fields": { - "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, - "dsp": outputs, - }, - "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, -} -pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) +Path(temp_output).rename(args.output) + +key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") + +if args.tier in ["dsp", "psp"]: + + raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] + raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, + "dsp": outputs, + }, + "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, + } +else: + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, + } + +Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.db_file, full_dict) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 8e2da80..3aba4aa 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -3,7 +3,7 @@ import time from pathlib import Path -from legendmeta import TextDB +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit @@ -13,12 +13,13 @@ argparser.add_argument("--pars_file", help="hit pars file", nargs="*") argparser.add_argument("--configs", help="configs", type=str, required=True) +argparser.add_argument("--metadata", help="metadata", type=str, required=True) +argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() @@ -41,21 +42,27 @@ msg = "unknown tier" raise ValueError(msg) -pars_dict = Props.read_from(args.pars_file) +meta = LegendMetadata(path=args.metadata) +chan_map = meta.channelmap(args.timestamp, system=args.datatype) +pars_dict = Props.read_from(args.pars_file) pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} hit_dict = {} channels_present = lh5.ls(args.input) for channel in pars_dict: chan_pars = pars_dict[channel].copy() - if channel in channel_dict: - cfg_dict = Props.read_from(channel_dict[channel]) - Props.add_to(cfg_dict, chan_pars) - chan_pars = cfg_dict - - if channel in channels_present: - hit_dict[f"{channel}/dsp"] = chan_pars + try: + detector = chan_map.map("daq.rawid")[int(channel[2:])].name + if detector in channel_dict: + cfg_dict = Props.read_from(channel_dict[detector]) + Props.add_to(cfg_dict, chan_pars) + chan_pars = cfg_dict + + if channel in channels_present: + hit_dict[f"{channel}/dsp"] = chan_pars + except KeyError: + pass t_start = time.time() Path(args.output).parent.mkdir(parents=True, exist_ok=True) @@ -79,7 +86,7 @@ } hit_channels.append(channel) -key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "") +key = args.output.replace(f"-tier_{args.tier}.lh5", "") full_dict = { "valid_fields": {args.tier: hit_outputs}, diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 2ceb3ab..faa39d6 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config from pathlib import Path import lgdo.lh5 as lh5 @@ -18,13 +19,20 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["tier_tcm"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") Path(args.output).parent.mkdir(parents=True, exist_ok=True) -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"] -settings = Props.read_from(channel_dict["config"]) + +settings = Props.read_from(config_dict["inputs"]["config"]) rng = np.random.default_rng() temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 5fb6d68..bed04d2 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -76,7 +76,7 @@ def replace_path(d, old_path, new_path): Path(args.output).parent.mkdir(parents=True, exist_ok=True) -if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml": +if file_extension in (".json", ".yaml", ".yml"): out_dict = {} for channel in channel_files: if Path(channel).suffix == file_extension: @@ -92,9 +92,7 @@ def replace_path(d, old_path, new_path): msg = "Output file extension does not match input file extension" raise RuntimeError(msg) - Props.write_to(temp_output, out_dict, "json") - - Path(temp_output).rename(out_file) + Props.write_to(out_file, out_dict) elif file_extension == ".pkl": out_dict = {} diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index b584648..b8d9a71 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config import pickle as pkl from pathlib import Path @@ -29,27 +30,24 @@ argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - sto = lh5.LH5Store() -log = logging.getLogger(__name__) + +configs = LegendMetadata(path=args.configs) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"] +log_config = config_dict["options"]["logging"] + +Path(args.log).parent.mkdir(parents=True, exist_ok=True) +log_config = Props.read_from(log_config) +log_config["handlers"]["file"]["filename"] = args.log +logging.config.dictConfig(log_config) +log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype) -channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][ - args.channel -] -kwarg_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["tau_config"][args.channel] +channel_dict = config_dict["inputs"]["processing_chain"][args.channel] +kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] kwarg_dict = Props.read_from(kwarg_dict) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 169b560..8625ed3 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -27,7 +27,7 @@ def get_results_dict(lq_class): "cal_energy_param": lq_class.cal_energy_param, "DEP_means": lq_class.timecorr_df.to_dict("index"), "rt_correction": lq_class.dt_fit_pars, - "cut_fit_pars": lq_class.cut_fit_pars, + "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), "cut_value": lq_class.cut_val, "sfs": lq_class.low_side_sf.to_dict("index"), } From a2f2d7eb7d850f7ae90c2c75835521fd96845a06 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:44:30 +0000 Subject: [PATCH 015/101] style: pre-commit fixes --- rules/filelist_gen.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index d0356a8..c90c570 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -220,7 +220,7 @@ def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] - keypart = f'-{wildcards.label.split("-", 1)[1]}' # remove the file selection from the keypart + keypart = f'-{wildcards.label.split("-",1)[1]}' # remove the file selection from the keypart analysis_runs, ignore_keys = get_analysis_runs( ignore_keys_file, analysis_runs_file, file_selection ) From ce2ad8526e7aad37ec8ff5e38e982d45daa3f120 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 5 Dec 2024 14:46:29 +0100 Subject: [PATCH 016/101] add isotopes where lines are from --- scripts/pars_pht_partcal.py | 56 ++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a2d74e4..7b6a4ed 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -166,34 +166,34 @@ def calibrate_partition( # calibrate pk_pars = [ - # (238.632, (10, 10), pgf.gauss_on_step), #double line - # (241.0, (10, 10), pgf.gauss_on_step), #double line - (277.371, (10, 7), pgf.gauss_on_linear), - (288.2, (7, 10), pgf.gauss_on_linear), - (300.1, (10, 10), pgf.gauss_on_linear), - (453.0, (10, 10), pgf.gauss_on_linear), - # (511, (20, 20), pgf.gauss_on_step), double line - (549.8, (10, 10), pgf.gauss_on_linear), - (583.187, (20, 20), pgf.hpge_peak), - (727.330, (20, 20), pgf.hpge_peak), - (763.13, (20, 10), pgf.gauss_on_linear), - (785.37, (10, 20), pgf.gauss_on_linear), - (860.557, (20, 20), pgf.hpge_peak), - (893.408, (20, 20), pgf.gauss_on_linear), - (927.6, (20, 20), pgf.gauss_on_linear), - (952.120, (20, 20), pgf.gauss_on_linear), - (982.7, (20, 20), pgf.gauss_on_linear), - (1078.62, (20, 7), pgf.gauss_on_linear), - (1093.9, (7, 20), pgf.gauss_on_linear), - (1512.7, (20, 20), pgf.gauss_on_linear), - (1592.511, (20, 20), pgf.hpge_peak), - (1620.50, (20, 20), pgf.hpge_peak), - (1679.7, (20, 20), pgf.gauss_on_linear), - (1806.0, (20, 20), pgf.gauss_on_linear), - (2103.511, (20, 20), pgf.hpge_peak), - (2614.511, (40, 20), pgf.hpge_peak), - (3125.511, (20, 20), pgf.gauss_on_linear), - (3197.7, (20, 20), pgf.gauss_on_linear), + # (238.632, (10, 10), pgf.gauss_on_step), #double line, Pb-212 + # (240.986, (10, 10), pgf.gauss_on_step), #double line, Ra-224 + (277.371, (10, 7), pgf.gauss_on_linear), # Tl-208 + (288.2, (7, 10), pgf.gauss_on_linear), # Bi-212 + (300.087, (10, 10), pgf.gauss_on_linear), # Pb-212 + (452.98, (10, 10), pgf.gauss_on_linear), # Bi-212 + # (511, (20, 20), pgf.gauss_on_step), double line, #e+e- + (549.73, (10, 10), pgf.gauss_on_linear), # Rn-220 + (583.187, (20, 20), pgf.hpge_peak), # Tl-208 + (727.330, (20, 20), pgf.hpge_peak), # Bi-212 + (763.13, (20, 10), pgf.gauss_on_linear), # Tl-208 + (785.37, (10, 20), pgf.gauss_on_linear), # Bi-212 + (860.557, (20, 20), pgf.hpge_peak), # Tl-208 + (893.408, (20, 20), pgf.gauss_on_linear), # Bi-212 + (927.6, (20, 20), pgf.gauss_on_linear), # Tl-208 + (952.120, (20, 20), pgf.gauss_on_linear), # Bi-212 + (982.7, (20, 20), pgf.gauss_on_linear), # Tl-208 + (1078.62, (20, 7), pgf.gauss_on_linear), # Bi-212 + (1093.9, (7, 20), pgf.gauss_on_linear), # Tl-208 + (1512.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1592.511, (20, 20), pgf.hpge_peak), # Tl-208 DEP + (1620.50, (20, 20), pgf.hpge_peak), # Bi-212 + (1679.7, (20, 20), pgf.gauss_on_linear), # Bi-212 + (1806.0, (20, 20), pgf.gauss_on_linear), # Bi-212 + (2103.511, (20, 20), pgf.hpge_peak), # Tl-208 SEP + (2614.511, (40, 20), pgf.hpge_peak), # Tl-208 + (3125.511, (20, 20), pgf.gauss_on_linear), # Summation + (3197.7, (20, 20), pgf.gauss_on_linear), # Summation (3475.1, (20, 20), pgf.gauss_on_linear), ] From 2deac35ff8c30a90eb13835d7f8e0e447ef803e4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 5 Dec 2024 21:03:13 +0100 Subject: [PATCH 017/101] choose ctc based on no_ctc energy instead --- scripts/pars_hit_ecal.py | 2 +- scripts/pars_pht_partcal.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index c94041d..43ba644 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -636,7 +636,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): hit_dict.update( { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc','noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } } diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 7b6a4ed..a454d76 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -308,7 +308,7 @@ def calibrate_partition( cal_dicts, { cal_energy_param.replace("_ctc", ""): { - "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", + "expression": f"where({cal_energy_param.replace('ctc', 'noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})", "parameters": {}, } }, From 97a0f8e9f9948c307121d994c3e29d49f46137c3 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 26 Dec 2024 18:47:31 +0100 Subject: [PATCH 018/101] Fix a bunch of docs things --- .gitignore | 2 ++ .readthedocs.yaml | 19 +++++++++++ docs/source/conf.py | 68 +++++++++++++++++++++++++++++++++++++ docs/source/developer.rst | 28 ++++++++------- docs/source/index.rst | 21 +++++++----- docs/source/user_manual.rst | 55 +++++++++++++++++------------- 6 files changed, 149 insertions(+), 44 deletions(-) create mode 100644 .readthedocs.yaml create mode 100644 docs/source/conf.py diff --git a/.gitignore b/.gitignore index b9905f2..90d9198 100644 --- a/.gitignore +++ b/.gitignore @@ -113,3 +113,5 @@ venv.bak/ # mypy .mypy_cache/ + +docs/source/api diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..afc42e1 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +version: 2 + +sphinx: + configuration: docs/source/conf.py + +build: + os: "ubuntu-22.04" + tools: + python: "3.12" + commands: + # FIXME: dependencies should not be explicitly listed here! + - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser + - rm -rf docs/source/api + - sphinx-apidoc + --private + --module-first + --force + --output-dir docs/source/api + scripts diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..013e65b --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,68 @@ +# Configuration file for the Sphinx documentation builder. +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts") + +project = "legend-dataflow" +copyright = "2024, the LEGEND Collaboration" + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx_copybutton", + "sphinx_inline_tabs", + "myst_parser", + "IPython.sphinxext.ipython_console_highlighting", +] + +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} +master_doc = "index" + +# Furo theme +html_theme = "furo" +html_theme_options = { + "source_repository": "https://github.com/legend-exp/legend-dataflow", + "source_branch": "main", + "source_directory": "docs/source", +} +html_title = f"{project}" + +# sphinx-napoleon +# enforce consistent usage of NumPy-style docstrings +napoleon_numpy_docstring = True +napoleon_google_docstring = False +napoleon_use_ivar = True +napoleon_use_rtype = False + +# intersphinx +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable", None), + "awkward": ("https://awkward-array.org/doc/stable", None), + "numba": ("https://numba.readthedocs.io/en/stable", None), + "pandas": ("https://pandas.pydata.org/docs", None), + "h5py": ("https://docs.h5py.org/en/stable", None), + "pint": ("https://pint.readthedocs.io/en/stable", None), + "hist": ("https://hist.readthedocs.io/en/latest", None), + "dspeed": ("https://dspeed.readthedocs.io/en/stable", None), + "daq2lh5": ("https://legend-daq2lh5.readthedocs.io/en/stable", None), + "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None), + "dbetto": ("https://dbetto.readthedocs.io/en/stable", None), + "pylegendmeta": ("https://pylegendmeta.readthedocs.io/en/stable", None), +} # add new intersphinx mappings here + +# sphinx-autodoc +autodoc_default_options = {"ignore-module-all": True} +# Include __init__() docstring in class docstring +autoclass_content = "both" +autodoc_typehints = "description" +autodoc_typehints_description_target = "documented_params" +autodoc_typehints_format = "short" diff --git a/docs/source/developer.rst b/docs/source/developer.rst index b6d7560..fa8db0e 100644 --- a/docs/source/developer.rst +++ b/docs/source/developer.rst @@ -1,15 +1,19 @@ Developers Guide -=============== +================ -Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files. -These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory. -In general the structure is that a series of rules are defined to run on some calibration data generation -a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier. -For most rules there are 2 versions the basic version and the partition version where the first uses a single run -while the latter will group many runs together. -This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets `_ repository. +Snakemake is configured around a series of rules which specify how to generate +a file/files from a set of input files. These rules are defined in the +``Snakefile`` and in the files in the ``rules`` directory. In general the +structure is that a series of rules are defined to run on some calibration data +generation a final ``par_{tier}.yaml`` file at the end which can be used by the +``tier``` rule to generate all the files in the tier. For most rules there are +2 versions the basic version and the partition version where the first uses a +single run while the latter will group many runs together. This grouping is +defined in the ``cal_grouping.yaml`` file in the `legend-datasets +`_ repository. -Each rule has specified its inputs and outputs along with how to generate which can be -a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory. -Additional parameters can also be defined. -Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_. +Each rule has specified its inputs and outputs along with how to generate which +can be a shell command or a call to a python function. These scripts are stored +in the ``scripts``` directory. Additional parameters can also be defined. +Full details can be found at `snakemake +`_. diff --git a/docs/source/index.rst b/docs/source/index.rst index 8534e71..fdf8cad 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,16 +1,18 @@ -Welcome to legend-dataflow's documentation! -================================== +legend-dataflow +=============== -*legend-dataflow* is a Python package based on Snakemake ``_ -for running the data production of LEGEND. -It is designed to calibrate and optimise hundreds of channels in parallel before -bringing them all together to process the data. It takes as an input the metadata -at `legend metadata `_. +*legend-dataflow* is a Python package based on Snakemake +``_ for running the data +production of LEGEND. It is designed to calibrate and optimise hundreds of +channels in parallel before bringing them all together to process the data. It +takes as an input the metadata at `legend metadata +`_. Getting started --------------- -It is recommended to install and use the package through the `legend-prodenv `_. +It is recommended to install and use the package through the `legend-prodenv +`_. Next steps ---------- @@ -23,7 +25,7 @@ Next steps .. toctree:: :maxdepth: 1 - tutorials + user_manual .. toctree:: :maxdepth: 1 @@ -38,4 +40,5 @@ Next steps :maxdepth: 1 :caption: Development + developer Source Code diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst index fb3e81b..90f4557 100644 --- a/docs/source/user_manual.rst +++ b/docs/source/user_manual.rst @@ -1,3 +1,6 @@ +User Manual +----------- + Configuration ============= @@ -13,33 +16,38 @@ the default path to the config file is ``./config.json``. Profiles ======== -A number of profiles are also included in the ``profiles`` directory. If none are specified, -the default profile is used. The profile can be specified by using the ``--profile`` option -when running Snakemake. These control how many jobs are run simultaneously, based on how many cores -are specified and the memory constraints of the system. A full list of all the options -that can be specified to snakemake can be found at `snakemake `_. +A number of profiles are also included in the ``profiles`` directory. If none +are specified, the default profile is used. The profile can be specified by +using the ``--profile`` option when running Snakemake. These control how many +jobs are run simultaneously, based on how many cores are specified and the +memory constraints of the system. A full list of all the options that can be +specified to snakemake can be found at `snakemake +`_. Running the Dataflow ==================== -To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file -generation. In a simple case this may just be a single file e.g. +To run the dataflow at the most basic level all that is necassary is to tell +snakemake the target file generation. In a simple case this may just be a +single file e.g. ```shell $ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5 ``` -This would generate the file and all the files that are required to generate it. -In general though we want to generate a large number of files, and we can do this using the ``gen`` target. +This would generate the file and all the files that are required to generate +it. In general though we want to generate a large number of files, and we can +do this using the ``gen`` target. Main output generation ====================== -Usually, the main output will be determined by a file-list. -The special output target ``{label}-{tier}.gen`` is used to -generate all files that follow the label up to the specified tier. -The label is composed of the following parts: -- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file -in the `legend-datasets `_ repository. +Usually, the main output will be determined by a file-list. The special output +target ``{label}-{tier}.gen`` is used to generate all files that follow the +label up to the specified tier. The label is composed of the following parts: + +- the filelist designator: in most cases this will be ``all``, but other + options are specified in the ``runlists.yaml`` file in the `legend-datasets + `_ repository. - experiment: the experiment name i.e. l200 - period: the period of the data e.g. p03 - run: the run number e.g. r000 @@ -47,19 +55,20 @@ in the `legend-datasets `_ reposi - timestamp: the timestamp of the data e.g. 20230401T000000Z Example: + ```shell $ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen ``` -You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen`` -If you want to specify a lower part of the label but leave a higher part free, -you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` . -Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between -e.g. ``all-l200-p03-r000_r001-dsp.gen``. +You can specify as many or as few of these as they like e.g. +``all-l200-p03-dsp.gen`` If you want to specify a lower part of the label but +leave a higher part free, you can use the ``*``` character e.g. +``all-l200-p03-*-cal-dsp.gen`` . Additionally if you want to specify multiple +options for a part of the label you can use the ``_`` character between e.g. +``all-l200-p03-r000_r001-dsp.gen``. -After the files -are created, the empty file ``{label}-{tier}.gen```` will be created to -mark the successful data production. +After the files are created, the empty file ``{label}-{tier}.gen```` will be +created to mark the successful data production. Monitoring From 4c6dffccf9c86362ff7f5069a2248eaa6d5e2311 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 26 Dec 2024 21:23:17 +0100 Subject: [PATCH 019/101] update blinding cal to new hpgecal --- scripts/blinding_calibration.py | 44 ++++++++++++++------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 62207e9..072e756 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -15,21 +15,25 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 -from pygama.math.histogram import better_int_binning, get_hist -from pygama.pargen.energy_cal import hpge_find_E_peaks +from pygama.pargen.energy_cal import HPGeCalibration mpl.use("agg") argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="files", nargs="*", type=str) + argparser.add_argument("--blind_curve", help="blind_curve", type=str) argparser.add_argument("--plot_file", help="out plot path", type=str) + argparser.add_argument("--meta", help="meta", type=str) +argparser.add_argument("--configs", help="configs", type=str) +argparser.add_argument("--log", help="log", type=str) + argparser.add_argument("--timestamp", help="timestamp", type=str) argparser.add_argument("--datatype", help="datatype", type=str) argparser.add_argument("--channel", help="channel", type=str) -argparser.add_argument("--configs", help="configs", type=str) -argparser.add_argument("--log", help="log", type=str) + +argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") @@ -56,15 +60,19 @@ dEuc = 1 / guess_keV # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) -Euc_min, Euc_max, nbins = better_int_binning( - x_lo=Euc_min, x_hi=Euc_max, n_bins=(Euc_max - Euc_min) / dEuc + + +hpge_cal = HPGeCalibration( + "daqenergy", + peaks_keV, + guess_keV, + 0, + uncal_is_int=True, + debug_mode=args.debug, ) -hist, bins, var = get_hist(E_uncal, range=(Euc_min, Euc_max), bins=nbins) # Run the rough peak search -detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks( - hist, bins, var, peaks_keV, n_sigma=5, deg=0 -) +detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal) log.info(f"{len(detected_peaks_locs)} peaks found:") log.info("\t Energy | Position ") @@ -98,20 +106,4 @@ pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) plt.close() -# else: -# out_dict = { -# "pars": { -# "operations": { -# "daqenergy_cal": { -# "expression": "daqenergy*a", -# "parameters": {"a": np.nan}, -# } -# } -# } -# } -# fig = plt.figure(figsize=(8, 10)) -# plt.suptitle(f"{args.channel}-blind_off") -# with open(args.plot_file, "wb") as w: -# pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) -# plt.close() Props.write_to_file(args.blind_curve, out_dict) From 08e20e7077016ab6265b6b1aeb99397ad99e6942 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 27 Dec 2024 18:36:28 +0100 Subject: [PATCH 020/101] Try fixing RTD build --- .readthedocs.yaml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index afc42e1..4612bfd 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,19 +1,23 @@ version: 2 -sphinx: - configuration: docs/source/conf.py - build: os: "ubuntu-22.04" tools: python: "3.12" commands: # FIXME: dependencies should not be explicitly listed here! - - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + - uv venv + - uv pip install + snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser - rm -rf docs/source/api - - sphinx-apidoc + - .venv/bin/python -m sphinx.ext.apidoc --private --module-first --force --output-dir docs/source/api scripts + - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D + language=en docs/source $READTHEDOCS_OUTPUT/html From 603f3ecbd14de0579420a262bcc5edd574af1204 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 27 Dec 2024 18:44:52 +0100 Subject: [PATCH 021/101] Bug fix --- .gitignore | 2 +- docs/Makefile | 8 +++++++- docs/source/conf.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 90d9198..4eb2181 100644 --- a/.gitignore +++ b/.gitignore @@ -77,7 +77,7 @@ instance/ .scrapy # Sphinx documentation -/docs/build/ +/docs/_build/ /docs/source/generated # PyBuilder diff --git a/docs/Makefile b/docs/Makefile index 9be493d..ff41907 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,13 @@ SOURCEDIR = source BUILDDIR = build all: apidoc - sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going + sphinx-build \ + -T \ + -b html \ + -d "$(BUILDDIR)"/doctrees \ + -D language=en \ + -W --keep-going \ + "$(SOURCEDIR)" "$(BUILDDIR)" apidoc: clean-apidoc sphinx-apidoc \ diff --git a/docs/source/conf.py b/docs/source/conf.py index 013e65b..dfb1a23 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -4,7 +4,7 @@ import sys from pathlib import Path -sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts") +sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix()) project = "legend-dataflow" copyright = "2024, the LEGEND Collaboration" From 9f4d1c274102e8a5ab8f51a14a0c48dbec8d226b Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 27 Dec 2024 18:46:11 +0100 Subject: [PATCH 022/101] Remove unneeded sphinx ext --- docs/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index dfb1a23..92ee6c2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,6 @@ "sphinx_copybutton", "sphinx_inline_tabs", "myst_parser", - "IPython.sphinxext.ipython_console_highlighting", ] source_suffix = { From 1152316bff97c4ff56d0a4624a1a39586d86ecfa Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 17:09:44 +0100 Subject: [PATCH 023/101] add snakefile to profile --- profiles/build-raw/config.yaml | 1 + profiles/default/config.yaml | 1 + profiles/legend-data/config.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/profiles/build-raw/config.yaml b/profiles/build-raw/config.yaml index 32a0814..4525deb 100644 --- a/profiles/build-raw/config.yaml +++ b/profiles/build-raw/config.yaml @@ -4,5 +4,6 @@ max-jobs-per-second: 1 resources: - mem_swap=3500 configfile: config.json +snakefile: ./workflow/Snakefile-build-raw keep-going: true rerun-incomplete: true diff --git a/profiles/default/config.yaml b/profiles/default/config.yaml index 6b7ddb0..53a11cd 100644 --- a/profiles/default/config.yaml +++ b/profiles/default/config.yaml @@ -1,4 +1,5 @@ cores: all configfile: config.json +snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true diff --git a/profiles/legend-data/config.yaml b/profiles/legend-data/config.yaml index 782e4df..364bdb1 100644 --- a/profiles/legend-data/config.yaml +++ b/profiles/legend-data/config.yaml @@ -4,5 +4,6 @@ max-jobs-per-second: 1 resources: - mem_swap=3500 configfile: config.json +snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true From 24fb2ed6907c2b66abd68822a0c977ed200b7b0c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 17:10:18 +0100 Subject: [PATCH 024/101] add table format to config --- templates/config.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/templates/config.json b/templates/config.json index d8189ee..0d801ba 100644 --- a/templates/config.json +++ b/templates/config.json @@ -50,6 +50,18 @@ "cache": "$_/software/python/cache" }, + "table_format": { + "raw": "ch{ch:07d}/raw", + "dsp": "ch{ch:07d}/dsp", + "psp": "ch{ch:07d}/dsp", + "hit": "ch{ch:07d}/hit", + "pht": "ch{ch:07d}/hit", + "evt": "{grp}/evt", + "pet": "{grp}/evt", + "skm": "{grp}/skm", + "tcm": "hardware_tcm_1" + }, + "execenv": { "cmd": "apptainer run", "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif", From c89b634fba5cc0bd42d03a9cac2e54933f19ac9e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 17:11:07 +0100 Subject: [PATCH 025/101] update to cal_groupings file --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 0174479..fd14ffb 100644 --- a/Snakefile +++ b/Snakefile @@ -46,7 +46,7 @@ chan_maps = chan_map_path(setup) meta = metadata_path(setup) det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml") +part = ds.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") basedir = workflow.basedir From 83fc32991810e4f3c47aa4857d420298aee17054 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 28 Dec 2024 20:13:11 +0100 Subject: [PATCH 026/101] add pyproject file --- .readthedocs.yaml | 3 +-- LICENSE.md | 4 ++- pyproject.toml | 67 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4612bfd..ca8910f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,8 +10,7 @@ build: - asdf install uv latest - asdf global uv latest - uv venv - - uv pip install - snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser + - uv pip install .[docs] - rm -rf docs/source/api - .venv/bin/python -m sphinx.ext.apidoc --private diff --git a/LICENSE.md b/LICENSE.md index c4148f9..35d8ee3 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,9 +1,11 @@ -The legend-dataflow-hades package is licensed under the MIT "Expat" License: +The legend-dataflow package is licensed under the MIT "Expat" License: > Copyright (c) 2021: > > Matteo Agostini > Oliver Schulz +> George Marshall +> Luigi Pertoldi > > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..53060c4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,67 @@ +[tool.uv] +package = false + +[tool.uv.workspace] +exclude = ["rules", "templates", "scripts", "generated", "inputs", "software", "workflow"] + +[tool.setuptools] +py-modules = [] + +[project] +name = "legend-dataflow" +description = "Python package for processing L200 data" +authors = [ + {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"}, + {name = "Luigi Pertoldi", email = "gipert@pm.me"}, + {name = "The Legend Collaboration"}, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT Expat License", + "Operating System :: MacOS", + "Operating System :: POSIX", + "Operating System :: Unix", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering", +] +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "dbetto>=1.0.5", + "snakemake>=8", +] +dynamic = [ + "version", +] + +[project.optional-dependencies] +no_container = [ + "pygama", + "dspeed", + "pylegendmeta", + "legend-pydataobj", + "legend-daq2lh5", +] +test = [ + "legend-dataflow[no_container]", + "pytest >=6", + "pytest-cov >=3", +] +dev = [ + "legend-dataflow[no_container]", + "pytest >=6", + "pytest-cov >=3", +] +docs = [ + "legend-dataflow[no_container]", + "sphinx>=7.0", + "myst_parser>=0.13", + "sphinx_inline_tabs", + "sphinx_copybutton", + "sphinx_autodoc_typehints", + "furo>=2023.08.17", +] From 7cd02734d919a2dcab5d8dc4d27e42f060147f9f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Dec 2024 16:27:10 +0100 Subject: [PATCH 027/101] add logging config and cleanup config loading --- scripts/build_dsp.py | 22 ++++++++++----- scripts/build_evt.py | 41 +++++++++++++-------------- scripts/build_hit.py | 33 ++++++++++++++-------- scripts/build_raw.py | 20 ++++++++++++-- scripts/build_raw_blind.py | 35 +++++++++++++---------- scripts/build_skm.py | 35 +++++++++++++---------- scripts/build_tcm.py | 24 +++++++++------- scripts/check_blinding.py | 28 +++++++++++-------- scripts/pars_dsp_build_svm.py | 29 +++++++++++++++---- scripts/pars_dsp_dplms.py | 25 +++++++++++------ scripts/pars_dsp_eopt.py | 35 +++++++++++------------ scripts/pars_dsp_event_selection.py | 36 ++++++++++++------------ scripts/pars_dsp_nopt.py | 35 +++++++++++------------ scripts/pars_dsp_svm.py | 14 ---------- scripts/pars_dsp_tau.py | 26 ++++++++++------- scripts/pars_hit_aoe.py | 31 +++++++++++---------- scripts/pars_hit_ecal.py | 43 ++++++++++++++++------------- scripts/pars_hit_lq.py | 29 +++++++++++-------- scripts/pars_hit_qc.py | 29 +++++++++++-------- scripts/pars_pht_aoecal.py | 30 +++++++++++--------- scripts/pars_pht_fast.py | 34 +++++++++++++---------- scripts/pars_pht_lqcal.py | 30 +++++++++++--------- scripts/pars_pht_partcal.py | 30 +++++++++++--------- scripts/pars_pht_qc.py | 31 ++++++++++++--------- scripts/pars_pht_qc_phy.py | 30 ++++++++++++-------- scripts/pars_tcm_pulser.py | 32 ++++++++++++--------- 26 files changed, 454 insertions(+), 333 deletions(-) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index c505058..f028ea6 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -52,17 +52,25 @@ def replace_list_with_array(dic): msg = f"Tier {args.tier} not supported" raise ValueError(msg) + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + channel_dict = config_dict["inputs"]["processing_chain"] settings_dict = config_dict["options"].get("settings", {}) if isinstance(settings_dict, str): settings_dict = Props.read_from(settings_dict) -log_config = config_dict["options"]["logging"] - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -log_config = Props.read_from(log_config) -log_config["handlers"]["file"]["filename"] = args.log -logging.config.dictConfig(log_config) -log = logging.getLogger("test") meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index a02d9f8..89fd215 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,6 +1,7 @@ import argparse import json import logging +import logging.config import time from pathlib import Path @@ -38,43 +39,43 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): argparser.add_argument("--xtc_file", help="xtc file", type=str) argparser.add_argument("--par_files", help="par files", nargs="*") -argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) +argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) - argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--output", help="output file", type=str) args = argparser.parse_args() -if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -else: - logging.basicConfig(level=logging.DEBUG) - -logging.getLogger("legendmeta").setLevel(logging.INFO) -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - # load in config configs = TextDB(args.configs, lazy=True) if args.tier in ("evt", "pet"): - config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"][ - "inputs" - ] - evt_config_file = config_dict["evt_config"] + rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"] + else: msg = "unknown tier" raise ValueError(msg) +config_dict = rule_dict["inputs"] +evt_config_file = config_dict["evt_config"] + +if "logging" in rule_dict["options"]: + log_config = rule_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(rule_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + meta = LegendMetadata(args.metadata, lazy=True) chmap = meta.channelmap(args.timestamp) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 3aba4aa..4daa2e5 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,5 +1,6 @@ import argparse import logging +import logging.config import time from pathlib import Path @@ -24,24 +25,32 @@ argparser.add_argument("--db_file", help="db file", type=str) args = argparser.parse_args() -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - configs = TextDB(args.configs, lazy=True) if args.tier == "hit" or args.tier == "pht": - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"][ - "inputs" - ]["hit_config"] + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"] else: msg = "unknown tier" raise ValueError(msg) +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + +channel_dict = config_dict["inputs"]["hit_config"] +settings_dict = config_dict["options"].get("settings", {}) +if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) + meta = LegendMetadata(path=args.metadata) chan_map = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index 03a4fca..081768f 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -23,9 +23,23 @@ Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][ - "inputs" -] +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + +channel_dict = config_dict["inputs"] settings = Props.read_from(channel_dict["settings"]) channel_dict = channel_dict["out_spec"] all_config = Props.read_from(channel_dict["gen_config"]) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 33a6c31..1405ecd 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -34,21 +34,26 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -logging.getLogger("lgdo").setLevel(logging.INFO) - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - configs = TextDB(args.configs, lazy=True) -channel_dict = configs.on(args.timestamp, system=args.datatype) - -hdf_settings = Props.read_from(channel_dict["snakemake_rules"]["tier_raw"]["inputs"]["settings"])[ - "hdf5_settings" -] -blinding_settings = Props.read_from( - channel_dict["snakemake_rules"]["tier_raw_blind"]["inputs"]["config"] -) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + +channel_dict = config_dict["inputs"] +hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] +blinding_settings = Props.read_from(config_dict["config"]) centroid = blinding_settings["centroid_in_keV"] # keV width = blinding_settings["width_in_keV"] # keV @@ -115,6 +120,7 @@ rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" temp_output = f"{args.output}.{rand_num}" +Path(temp_output).parent.mkdir(parents=True, exist_ok=True) for channel in all_channels: try: @@ -166,4 +172,5 @@ ) # rename the temp file +Path(args.output).parent.mkdir(parents=True, exist_ok=True) Path(temp_output).rename(args.output) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index 10bf876..058025a 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -30,22 +30,27 @@ def get_all_out_fields(input_table, out_fields, current_field=""): argparser.add_argument("--output", help="output file", required=True) args = argparser.parse_args() -if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - # load in config -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -skm_config_file = configs["snakemake_rules"]["tier_skm"]["inputs"]["skm_config"] - +config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[ + "snakemake_rules" +]["tier_skm"] + +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) + + +skm_config_file = config_dict["inputs"]["skm_config"] evt_filter = Props.read_from(skm_config_file)["evt_filter"] out_fields = Props.read_from(skm_config_file)["keep_fields"] diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index faa39d6..7f9c4a9 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -21,21 +21,25 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["tier_tcm"] -log_config = config_dict["options"]["logging"] - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -log_config = Props.read_from(log_config) -log_config["handlers"]["file"]["filename"] = args.log -logging.config.dictConfig(log_config) -log = logging.getLogger("test") - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) settings = Props.read_from(config_dict["inputs"]["config"]) rng = np.random.default_rng() temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" +Path(args.output).parent.mkdir(parents=True, exist_ok=True) # get the list of channels by fcid ch_list = lh5.ls(args.input, "/ch*") diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 7d6da04..bf2ca93 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -15,16 +15,13 @@ import matplotlib.pyplot as plt import numexpr as ne import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from lgdo.utils import numba_defaults from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima mpl.use("Agg") -numba_defaults.cache = False -numba_defaults.boundscheck = False argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="files", nargs="*", type=str) @@ -39,14 +36,21 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) # get the usability status for this channel chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index 0d6ada7..67607bb 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -3,23 +3,40 @@ import pickle as pkl from pathlib import Path +from legendmeta import TextDB from legendmeta.catalog import Props from lgdo import lh5 from sklearn.svm import SVC argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--configs", help="config file", type=str) + +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) argparser.add_argument("--train_data", help="input data file", type=str, required=True) argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) - -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) # Load files tb = lh5.read("ml_train/dsp", args.train_data) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 87403b8..8806dbd 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -6,7 +6,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict @@ -31,14 +31,21 @@ args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) log = logging.getLogger(__name__) sto = lh5.LH5Store() diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index d4f0098..9b4e092 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -10,7 +10,7 @@ import pygama.pargen.energy_optimisation as om # noqa: F401 import sklearn.gaussian_process.kernels as ker from dspeed.units import unit_registry as ureg -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( @@ -44,17 +44,22 @@ argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) -log = logging.getLogger(__name__) sto = lh5.LH5Store() t0 = time.time() @@ -62,12 +67,8 @@ channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -conf = LegendMetadata(path=args.configs) -configs = conf.on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][ - args.channel -] -opt_json = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["optimiser_config"][args.channel] +dsp_config = config_dict["inputs"]["processing_chain"][args.channel] +opt_json = config_dict["inputs"]["optimiser_config"][args.channel] opt_dict = Props.read_from(opt_json) db_dict = Props.read_from(args.decay_const) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index f4dfd7d..7cbabcc 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -11,7 +11,7 @@ import numpy as np import pygama.math.histogram as pgh import pygama.pargen.energy_cal as pgc -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp @@ -96,16 +96,22 @@ def get_out_data( argparser.add_argument("--peak_file", help="peak_file", type=str, required=True) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) - logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) - log = logging.getLogger(__name__) sto = lh5.LH5Store() t0 = time.time() @@ -113,14 +119,8 @@ def get_out_data( channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - conf = LegendMetadata(path=args.configs) - configs = conf.on(args.timestamp, system=args.datatype) - dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][ - "processing_chain" - ][args.channel] - peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][ - args.channel - ] + dsp_config = config_dict["inputs"]["processing_chain"][args.channel] + peak_json = config_dict["inputs"]["peak_config"][args.channel] peak_dict = Props.read_from(peak_json) db_dict = Props.read_from(args.decay_const) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 5de3a59..9cc96e2 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -7,7 +7,7 @@ import lgdo.lh5 as lh5 import numpy as np import pygama.pargen.noise_optimization as pno -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp @@ -32,15 +32,21 @@ args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) -logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) - -log = logging.getLogger(__name__) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) t0 = time.time() @@ -49,15 +55,10 @@ channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -conf = LegendMetadata(path=args.configs) -configs = conf.on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][ - args.channel -] -opt_json = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["optimiser_config"][args.channel] +dsp_config = config_dict["inputs"]["processing_chain"][args.channel] +opt_json = config_dict["inputs"]["optimiser_config"][args.channel] opt_dict = Props.read_from(opt_json) - db_dict = Props.read_from(args.database) if opt_dict.pop("run_nopt") is True: diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 370e320..359bc3f 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,5 +1,4 @@ import argparse -import logging from pathlib import Path from legendmeta.catalog import Props @@ -11,19 +10,6 @@ argparser.add_argument("--svm_file", help="svm file", required=True) args = argparser.parse_args() - -if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -else: - logging.basicConfig(level=logging.DEBUG) - -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - par_data = Props.read_from(args.input_file) file = f"'$_/{Path(args.svm_file).name}'" diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index b8d9a71..a3a3183 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -6,7 +6,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp @@ -32,15 +32,21 @@ sto = lh5.LH5Store() -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"] -log_config = config_dict["options"]["logging"] - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -log_config = Props.read_from(log_config) -log_config["handlers"]["file"]["filename"] = args.log -logging.config.dictConfig(log_config) -log = logging.getLogger("test") +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index c30c7ef..6924b39 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak @@ -17,7 +17,6 @@ from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -125,23 +124,27 @@ def aoe_calibration( argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_hit_aoecal" -]["inputs"]["aoecal_config"][args.channel] - +channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 43ba644..c16f75c 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -14,7 +14,7 @@ import numpy as np import pygama.math.distributions as pgf import pygama.math.histogram as pgh -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly @@ -443,13 +443,28 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + if args.tier == "hit": + config_dict = config_dict["pars_hit_ecal"] + elif args.tier == "pht": + config_dict = config_dict["pars_pht_ecal"] + else: + msg = "invalid tier" + raise ValueError(msg) + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) @@ -470,17 +485,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): hit_dict.update(database_dic[channel]["ctc_params"]) - # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - if args.tier == "hit": - channel_dict = channel_dict["pars_hit_ecal"]["inputs"]["ecal_config"][args.channel] - elif args.tier == "pht": - channel_dict = channel_dict["pars_pht_ecal"]["inputs"]["ecal_config"][args.channel] - else: - msg = "invalid tier" - raise ValueError(msg) - + channel_dict = config_dict["inputs"]["ecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) # convert plot functions from strings to functions and split off baseline and common plots diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 8625ed3..fbebbba 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 @@ -18,7 +18,6 @@ from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -144,22 +143,28 @@ def lq_calibration( argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(path=args.configs) -channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_hit_lqcal" -]["inputs"]["lqcal_config"][args.channel] +channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) ecal_dict = Props.read_from(args.ecal_file) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 320fee9..f0e681b 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -9,7 +9,7 @@ from pathlib import Path import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -53,23 +53,28 @@ argparser.add_argument("--save_path", help="save_path", type=str) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_hit_qc"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - channel_dict = channel_dict["pars_hit_qc"]["inputs"]["qc_config"][args.channel] - + channel_dict = config_dict["inputs"]["qc_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) if args.overwrite_files: diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index ca938e5..74cf382 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak @@ -269,23 +269,27 @@ def eres_func(x): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_aoecal"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_aoecal" - ]["inputs"]["par_pht_aoecal_config"][args.channel] - + channel_dict = config_dict["inputs"]["par_pht_aoecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) cal_dict = {} diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 104ad05..7f3a168 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pars_pht_aoecal import run_aoe_calibration from pars_pht_lqcal import run_lq_calibration @@ -18,7 +18,6 @@ from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -69,13 +68,21 @@ def run_splitter(files): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"] + if "logging" in config_dict["pars_pht_partcal"]["options"]: + log_config = config_dict["pars_pht_partcal"]["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) @@ -122,17 +129,14 @@ def run_splitter(files): timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"] - kwarg_dict = Props.read_from( - channel_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel] + config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel] ) aoe_kwarg_dict = Props.read_from( - channel_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel] + config_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel] ) lq_kwarg_dict = Props.read_from( - channel_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel] + config_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel] ) params = [ diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 2c67745..862711b 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 @@ -266,23 +266,27 @@ def eres_func(x): argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_lqcal"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_lqcal" - ]["inputs"]["lqcal_config"][args.channel] - + channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) cal_dict = {} diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index a454d76..1fad3d3 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -12,7 +12,7 @@ import pandas as pd import pygama.math.distributions as pgf import pygama.math.histogram as pgh -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_tcm_pulser_ids @@ -429,13 +429,21 @@ def calibrate_partition( argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_partcal"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) @@ -482,11 +490,7 @@ def calibrate_partition( timestamp = fk.timestamp final_dict[timestamp] = sorted(filelist) - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"][ - "pars_pht_partcal" - ]["inputs"]["pars_pht_partcal_config"][args.channel] - + channel_dict = config_dict["inputs"]["pars_pht_partcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) params = [ diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 495c87b..ac728d7 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -9,7 +9,7 @@ from pathlib import Path import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -57,22 +57,29 @@ ) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_qc"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] + channel_dict = config_dict["inputs"]["qc_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) # sort files in dictionary where keys are first timestamp from run if isinstance(args.cal_files, list): @@ -88,8 +95,6 @@ np.unique(cal_files) ) # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also - kwarg_dict = Props.read_from(channel_dict) - if args.overwrite_files: overwrite = Props.read_from(args.overwrite_files) if channel in overwrite: diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 4f87afb..e308f5e 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -10,7 +10,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -45,22 +45,29 @@ ) args = argparser.parse_args() - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") - logging.getLogger("numba").setLevel(logging.INFO) - logging.getLogger("parse").setLevel(logging.INFO) - logging.getLogger("lgdo").setLevel(logging.INFO) - logging.getLogger("h5py").setLevel(logging.INFO) - logging.getLogger("matplotlib").setLevel(logging.INFO) - logging.getLogger("legendmeta").setLevel(logging.INFO) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_pht_qc"] + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{chmap[args.channel].daq.rawid:07}" # get metadata dictionary - configs = LegendMetadata(path=args.configs) - channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] - channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel] + channel_dict = config_dict["qc_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) sto = lh5.LH5Store() @@ -90,7 +97,6 @@ puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0] bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200) - kwarg_dict = Props.read_from(channel_dict) kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 9e6ad42..018e386 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -4,7 +4,7 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata +from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids @@ -22,27 +22,33 @@ argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) args = argparser.parse_args() -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -logging.getLogger("legendmeta").setLevel(logging.INFO) +configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] +if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["file"]["filename"] = args.log + logging.config.dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) +else: + if args.log is not None: + Path(args.log).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + log = logging.getLogger(__name__) sto = lh5.LH5Store() log = logging.getLogger(__name__) -configs = LegendMetadata(path=args.configs) -config_dict = configs.on(args.timestamp, system=args.datatype) -kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"] + +kwarg_dict = config_dict["inputs"]["pulser_config"] +kwarg_dict = Props.read_from(kwarg_dict) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid}" -kwarg_dict = Props.read_from(kwarg_dict) - if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": tcm_files = args.tcm_files[0] with Path(tcm_files).open() as f: From 59e273b9fabdb4c51276ceeee4c34328a5481a0c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Dec 2024 16:28:25 +0100 Subject: [PATCH 028/101] add param info to svm rule --- rules/dsp.smk | 174 ++++++++++++++++++++++++++------------------------ rules/psp.smk | 8 +++ 2 files changed, 100 insertions(+), 82 deletions(-) diff --git a/rules/dsp.smk b/rules/dsp.smk index 7ae67a7..4d70945 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -234,6 +234,10 @@ rule build_svm_dsp: train_data=lambda wildcards: str( get_input_par_file(wildcards, "dsp", "svm_hyperpars") ).replace("hyperpars.yaml", "train.lh5"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -246,6 +250,10 @@ rule build_svm_dsp: "{swenv} python3 -B " "{basedir}/../scripts/pars_dsp_build_svm.py " "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" @@ -363,85 +371,87 @@ rule build_pars_dsp_db: "--channelmap {meta} " -# rule build_pars_dsp: -# input: -# in_files=lambda wildcards: get_par_chanlist( -# setup, -# f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", -# "dsp", -# basedir, -# det_status, -# chan_maps, -# name="dplms", -# extension="lh5", -# ), -# in_db=get_pattern_pars_tmp( -# setup, -# "dsp", -# datatype="cal", -# ), -# plts=get_pattern_plts(setup, "dsp"), -# objects=get_pattern_pars( -# setup, -# "dsp", -# name="objects", -# extension="dir", -# check_in_cycle=check_in_cycle, -# ), -# params: -# timestamp="{timestamp}", -# datatype="cal", -# output: -# out_file=get_pattern_pars( -# setup, -# "dsp", -# extension="lh5", -# check_in_cycle=check_in_cycle, -# ), -# out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), -# group: -# "merge-dsp" -# shell: -# "{swenv} python3 -B " -# "{basedir}/../scripts/merge_channels.py " -# "--output {output.out_file} " -# "--in_db {input.in_db} " -# "--out_db {output.out_db} " -# "--input {input.in_files} " -# "--timestamp {params.timestamp} " -# "--channelmap {meta} " -# rule build_dsp: -# input: -# raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), -# pars_file=ancient( -# lambda wildcards: ParsCatalog.get_par_file( -# setup, wildcards.timestamp, "dsp" -# ) -# ), -# params: -# timestamp="{timestamp}", -# datatype="{datatype}", -# ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, -# output: -# tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), -# db_file=get_pattern_pars_tmp(setup, "dsp_db"), -# log: -# get_pattern_log(setup, "tier_dsp"), -# group: -# "tier-dsp" -# resources: -# runtime=300, -# mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, -# shell: -# "{swenv} python3 -B " -# "{basedir}/../scripts/build_dsp.py " -# "--log {log} " -# "--tier dsp " -# f"--configs {ro(configs)} " -# "--metadata {meta} " -# "--datatype {params.datatype} " -# "--timestamp {params.timestamp} " -# "--input {params.ro_input[raw_file]} " -# "--output {output.tier_file} " -# "--db_file {output.db_file} " -# "--pars_file {params.ro_input[pars_file]} " +rule build_pars_dsp: + input: + in_files=lambda wildcards: get_par_chanlist( + setup, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + "dsp", + basedir, + det_status, + chan_maps, + name="dplms", + extension="lh5", + ), + in_db=get_pattern_pars_tmp( + setup, + "dsp", + datatype="cal", + ), + plts=get_pattern_plts(setup, "dsp"), + objects=get_pattern_pars( + setup, + "dsp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + out_file=get_pattern_pars( + setup, + "dsp", + extension="lh5", + check_in_cycle=check_in_cycle, + ), + out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), + group: + "merge-dsp" + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/merge_channels.py " + "--output {output.out_file} " + "--in_db {input.in_db} " + "--out_db {output.out_db} " + "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + + +rule build_dsp: + input: + raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), + pars_file=ancient( + lambda wildcards: ParsCatalog.get_par_file( + setup, wildcards.timestamp, "dsp" + ) + ), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, + output: + tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(setup, "dsp_db"), + log: + get_pattern_log(setup, "tier_dsp"), + group: + "tier-dsp" + resources: + runtime=300, + mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/build_dsp.py " + "--log {log} " + "--tier dsp " + f"--configs {ro(configs)} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--input {params.ro_input[raw_file]} " + "--output {output.tier_file} " + "--db_file {output.db_file} " + "--pars_file {params.ro_input[pars_file]} " diff --git a/rules/psp.smk b/rules/psp.smk index 9fc0861..5505f27 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -185,6 +185,10 @@ rule build_svm_psp: train_data=lambda wildcards: str( get_input_par_file(wildcards, "psp", "svm_hyperpars") ).replace("hyperpars.yaml", "train.lh5"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -197,6 +201,10 @@ rule build_svm_psp: "{swenv} python3 -B " "{basedir}/../scripts/pars_dsp_build_svm.py " "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" From 2cc123246f58eb9b06eeb37ad7eb2b31ee98bed2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 8 Jan 2025 17:04:34 +0100 Subject: [PATCH 029/101] move logging to function --- scripts/build_dsp.py | 18 ++---------------- scripts/build_evt.py | 21 ++++----------------- scripts/build_hit.py | 17 ++--------------- scripts/build_raw.py | 15 ++------------- scripts/build_raw_blind.py | 16 ++-------------- scripts/build_skm.py | 17 ++--------------- scripts/build_tcm.py | 18 +++--------------- scripts/check_blinding.py | 17 +++-------------- scripts/pars_dsp_build_svm.py | 19 +++---------------- scripts/pars_dsp_dplms.py | 22 ++++++---------------- scripts/pars_dsp_eopt.py | 17 +++-------------- scripts/pars_dsp_event_selection.py | 17 +++-------------- scripts/pars_dsp_nopt.py | 17 +++-------------- scripts/pars_dsp_tau.py | 18 +++--------------- scripts/pars_hit_aoe.py | 17 +++-------------- scripts/pars_hit_ecal.py | 16 +++------------- scripts/pars_hit_lq.py | 17 +++-------------- scripts/pars_hit_qc.py | 16 +++------------- scripts/pars_pht_aoecal.py | 16 +++------------- scripts/pars_pht_fast.py | 17 +++-------------- scripts/pars_pht_lqcal.py | 18 +++--------------- scripts/pars_pht_partcal.py | 18 +++--------------- scripts/pars_pht_qc.py | 16 +++------------- scripts/pars_pht_qc_phy.py | 16 +++------------- scripts/pars_tcm_pulser.py | 16 +++------------- scripts/util/log.py | 28 ++++++++++++++++++++++++++++ 26 files changed, 102 insertions(+), 358 deletions(-) create mode 100644 scripts/util/log.py diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index f028ea6..f7b4141 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config import re import time import warnings @@ -11,6 +9,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 +from utils.log import build_log def replace_list_with_array(dic): @@ -52,20 +51,7 @@ def replace_list_with_array(dic): msg = f"Tier {args.tier} not supported" raise ValueError(msg) - -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"]["processing_chain"] settings_dict = config_dict["options"].get("settings", {}) diff --git a/scripts/build_evt.py b/scripts/build_evt.py index 89fd215..e56912b 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -1,16 +1,15 @@ import argparse import json -import logging -import logging.config import time from pathlib import Path import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import Props, TextDB +from legendmeta import LegendMetadata from lgdo.types import Array from pygama.evt import build_evt +from util.log import build_log sto = lh5.LH5Store() @@ -62,19 +61,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): config_dict = rule_dict["inputs"] evt_config_file = config_dict["evt_config"] -if "logging" in rule_dict["options"]: - log_config = rule_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(rule_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(rule_dict, args.log) meta = LegendMetadata(args.metadata, lazy=True) chmap = meta.channelmap(args.timestamp) diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 4daa2e5..cec39b7 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config import time from pathlib import Path @@ -8,6 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) @@ -32,19 +31,7 @@ msg = "unknown tier" raise ValueError(msg) -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"]["hit_config"] settings_dict = config_dict["options"].get("settings", {}) diff --git a/scripts/build_raw.py b/scripts/build_raw.py index 081768f..7e1dd1b 100644 --- a/scripts/build_raw.py +++ b/scripts/build_raw.py @@ -6,6 +6,7 @@ from daq2lh5 import build_raw from legendmeta import TextDB from legendmeta.catalog import Props +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) @@ -25,19 +26,7 @@ configs = TextDB(args.configs, lazy=True) config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"] settings = Props.read_from(channel_dict["settings"]) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 1405ecd..0957c7a 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -11,7 +11,6 @@ """ import argparse -import logging from pathlib import Path import numexpr as ne @@ -19,6 +18,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) @@ -37,19 +37,7 @@ configs = TextDB(args.configs, lazy=True) config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) channel_dict = config_dict["inputs"] hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] diff --git a/scripts/build_skm.py b/scripts/build_skm.py index 058025a..c8ff972 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,12 +1,11 @@ import argparse -import logging -from pathlib import Path import awkward as ak from legendmeta import TextDB from legendmeta.catalog import Props from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors +from utils.log import build_log def get_all_out_fields(input_table, out_fields, current_field=""): @@ -35,19 +34,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): "snakemake_rules" ]["tier_skm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +log = build_log(config_dict, args.log) skm_config_file = config_dict["inputs"]["skm_config"] diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 7f9c4a9..3ddf5dd 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config from pathlib import Path import lgdo.lh5 as lh5 @@ -9,6 +7,7 @@ from legendmeta import TextDB from legendmeta.catalog import Props from pygama.evt.build_tcm import build_tcm +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) @@ -21,19 +20,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["tier_tcm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) settings = Props.read_from(config_dict["inputs"]["config"]) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index bf2ca93..44261a5 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -7,7 +7,6 @@ """ import argparse -import logging import pickle as pkl from pathlib import Path @@ -20,6 +19,7 @@ from lgdo import lh5 from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima +from utils.log import build_log mpl.use("Agg") @@ -38,19 +38,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) # get the usability status for this channel chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index 67607bb..a31a8c1 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl from pathlib import Path @@ -7,6 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from sklearn.svm import SVC +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) @@ -14,8 +14,6 @@ argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) argparser.add_argument("--train_data", help="input data file", type=str, required=True) @@ -24,19 +22,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) # Load files tb = lh5.read("ml_train/dsp", args.train_data) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 8806dbd..64c7a9f 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -10,6 +10,7 @@ from legendmeta.catalog import Props from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) @@ -32,20 +33,9 @@ args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) +config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] + +log = build_log(config_dict, args.log) log = logging.getLogger(__name__) sto = lh5.LH5Store() @@ -55,9 +45,9 @@ channel = f"ch{channel_dict[args.channel].daq.rawid:07}" configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel] +dsp_config = config_dict["inputs"]["proc_chain"][args.channel] -dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel] +dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] dplms_dict = Props.read_from(dplms_json) db_dict = Props.read_from(args.database) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 9b4e092..5e9a009 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl import time import warnings @@ -18,6 +17,7 @@ run_bayesian_optimisation, run_one_dsp, ) +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -46,19 +46,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) sto = lh5.LH5Store() t0 = time.time() diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 7cbabcc..9999134 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -1,6 +1,5 @@ import argparse import json -import logging import time import warnings from bisect import bisect_left @@ -15,6 +14,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -98,19 +98,8 @@ def get_out_data( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) sto = lh5.LH5Store() t0 = time.time() diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 9cc96e2..85883b8 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl import time from pathlib import Path @@ -11,6 +10,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp +from utils.log import build_log sto = lh5.LH5Store() @@ -34,19 +34,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) t0 = time.time() diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index a3a3183..4f3cf9d 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -1,6 +1,4 @@ import argparse -import logging -import logging.config import pickle as pkl from pathlib import Path @@ -11,6 +9,7 @@ from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) @@ -34,19 +33,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index 6924b39..4d3f503 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -1,7 +1,6 @@ from __future__ import annotations import argparse -import logging import pickle as pkl import warnings from pathlib import Path @@ -16,6 +15,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -126,19 +126,8 @@ def aoe_calibration( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index c16f75c..aab5f41 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -23,6 +23,7 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) mpl.use("agg") @@ -452,19 +453,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): else: msg = "invalid tier" raise ValueError(msg) - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index fbebbba..3487c38 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -1,7 +1,6 @@ from __future__ import annotations import argparse -import logging import pickle as pkl import warnings from pathlib import Path @@ -17,6 +16,7 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -145,19 +145,8 @@ def lq_calibration( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) channel_dict = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index f0e681b..6b3369f 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -19,6 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) @@ -55,19 +56,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_hit_qc"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 74cf382..91ae176 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -18,6 +18,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -271,19 +272,8 @@ def eres_func(x): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_aoecal"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 7f3a168..b8d48d2 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import warnings from pathlib import Path @@ -17,6 +16,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -70,19 +70,8 @@ def run_splitter(files): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"] - if "logging" in config_dict["pars_pht_partcal"]["options"]: - log_config = config_dict["pars_pht_partcal"]["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict["pars_pht_partcal"], args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 862711b..101acea 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -3,7 +3,6 @@ import argparse import copy import json -import logging import pickle as pkl import warnings from pathlib import Path @@ -19,8 +18,8 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -268,19 +267,8 @@ def eres_func(x): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_lqcal"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 1fad3d3..6eb25eb 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -2,7 +2,6 @@ import argparse import copy -import logging import pickle as pkl import re import warnings @@ -19,8 +18,8 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey +from utils.log import build_log -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -431,19 +430,8 @@ def calibrate_partition( configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_partcal"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index ac728d7..f3f634b 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -19,6 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) @@ -59,19 +60,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_qc"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index e308f5e..e642aa3 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -18,6 +18,7 @@ get_keys, ) from util.convert_np import convert_dict_np_to_float +from utils.log import build_log log = logging.getLogger(__name__) @@ -47,19 +48,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_qc"] - if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) - else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + + log = build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 018e386..4ae8843 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -7,6 +7,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from utils.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) @@ -24,19 +25,8 @@ configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] -if "logging" in config_dict["options"]: - log_config = config_dict["options"]["logging"] - log_config = Props.read_from(log_config) - if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - log_config["handlers"]["file"]["filename"] = args.log - logging.config.dictConfig(log_config) - log = logging.getLogger(config_dict["options"].get("logger", "prod")) -else: - if args.log is not None: - Path(args.log).parent.makedir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - log = logging.getLogger(__name__) + +log = build_log(config_dict, args.log) sto = lh5.LH5Store() log = logging.getLogger(__name__) diff --git a/scripts/util/log.py b/scripts/util/log.py new file mode 100644 index 0000000..79b97c5 --- /dev/null +++ b/scripts/util/log.py @@ -0,0 +1,28 @@ +import logging +from logging.config import dictConfig +from pathlib import Path + +from dbetto import Props + + +def build_log(config_dict, log_file=None): + if "logging" in config_dict["options"]: + log_config = config_dict["options"]["logging"] + log_config = Props.read_from(log_config) + if log_file is not None: + Path(log_file).parent.mkdir(parents=True, exist_ok=True) + log_config["handlers"]["dynamic"] = { + "class": "logging.FileHandler", + "level": "DEBUG", + "formatter": "simple", + "filename": log_file, + "mode": "a", + } + dictConfig(log_config) + log = logging.getLogger(config_dict["options"].get("logger", "prod")) + else: + if log_file is not None: + Path(log_file).parent.makedir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w") + log = logging.getLogger(__name__) + return log From 72140e2b6eca848fbd909cc1e8c65943a89245ed Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 8 Jan 2025 17:04:54 +0100 Subject: [PATCH 030/101] fix svm rules --- Snakefile | 18 +++++++++--------- rules/dsp.smk | 2 -- rules/psp.smk | 2 -- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/Snakefile b/Snakefile index fd14ffb..0d8dc94 100644 --- a/Snakefile +++ b/Snakefile @@ -134,15 +134,15 @@ onsuccess: if os.path.isfile(file): os.remove(file) - # # remove filelists - # files = glob.glob(os.path.join(filelist_path(setup), "*")) - # for file in files: - # if os.path.isfile(file): - # os.remove(file) - # if os.path.exists(filelist_path(setup)): - # os.rmdir(filelist_path(setup)) - - # remove logs + # remove filelists + files = glob.glob(os.path.join(filelist_path(setup), "*")) + for file in files: + if os.path.isfile(file): + os.remove(file) + if os.path.exists(filelist_path(setup)): + os.rmdir(filelist_path(setup)) + + # remove logs files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): diff --git a/rules/dsp.smk b/rules/dsp.smk index 4d70945..66a18c8 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -237,7 +237,6 @@ rule build_svm_dsp: params: timestamp="{timestamp}", datatype="cal", - channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: @@ -253,7 +252,6 @@ rule build_svm_dsp: "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" diff --git a/rules/psp.smk b/rules/psp.smk index 5505f27..bde834d 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -188,7 +188,6 @@ rule build_svm_psp: params: timestamp="{timestamp}", datatype="cal", - channel="{channel}", output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: @@ -204,7 +203,6 @@ rule build_svm_psp: "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--channel {params.channel} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " "--output_file {output.dsp_pars}" From 5139f183695a2377cd8d94b3fa12e68c58060227 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 8 Jan 2025 18:45:57 +0100 Subject: [PATCH 031/101] add dbetto dependency to configs --- templates/config-nersc.json | 1 + templates/config.json | 1 + 2 files changed, 2 insertions(+) diff --git a/templates/config-nersc.json b/templates/config-nersc.json index 5d0c927..9df4fe7 100644 --- a/templates/config-nersc.json +++ b/templates/config-nersc.json @@ -74,6 +74,7 @@ "pkg_versions": { "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==0.10.2", + "dbetto": "dbetto==1.0.6", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.10.0", "legend-daq2lh5": "legend-daq2lh5==1.2.1", diff --git a/templates/config.json b/templates/config.json index 0d801ba..17f4bbf 100644 --- a/templates/config.json +++ b/templates/config.json @@ -76,6 +76,7 @@ "pkg_versions": { "pygama": "pygama==2.0.3", "pylegendmeta": "pylegendmeta==1.1.0", + "dbetto": "dbetto==1.0.6", "dspeed": "dspeed==1.6.1", "legend-pydataobj": "legend-pydataobj==1.9.0", "legend-daq2lh5": "legend-daq2lh5==1.2.2", From 4dea2743a895cd904f02799b7ef77b45402cfc19 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 16:26:03 +0100 Subject: [PATCH 032/101] Fix bugs in complete_run.py --- scripts/complete_run.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index fe800e8..e4c5eb3 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -131,8 +131,9 @@ def get_run(Filekey): return key_dict -def build_valid_keys(input_files, output_dir): - infiles = Path(as_ro(input_files)).glob() +def build_valid_keys(input_files_regex, output_dir): + in_regex = Path(as_ro(input_files_regex)) + infiles = in_regex.parent.glob(in_regex.name) key_dict = get_keys(infiles) for key in list(key_dict): @@ -254,9 +255,8 @@ def tdirs(tier): def fformat(tier): - return as_ro( - patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False) - ).replace(as_ro(ut.get_tier_path(snakemake.params.setup, tier)), "") + abs_path = patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False) + return str(abs_path).replace(ut.get_tier_path(snakemake.params.setup, tier), "") file_db_config |= { @@ -267,7 +267,7 @@ def fformat(tier): if snakemake.wildcards.tier != "daq": print(f"INFO: ...building FileDBs with {snakemake.threads} threads") - Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True) + Path(snakemake.params.filedb_path).mkdir(parents=True, exist_ok=True) with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f: json.dump(file_db_config, f, indent=2) From 0c4392440fec4ab3b40b807613aa3acfe94430c3 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 16:35:46 +0100 Subject: [PATCH 033/101] Support using specialized build_raw script depending on DAQ extension --- Snakefile | 1 - rules/common.smk | 4 +-- rules/raw.smk | 65 +++++++++++++++++++++------------------- scripts/util/patterns.py | 11 ++++--- 4 files changed, 42 insertions(+), 39 deletions(-) diff --git a/Snakefile b/Snakefile index 0d8dc94..3a44ece 100644 --- a/Snakefile +++ b/Snakefile @@ -12,7 +12,6 @@ This includes: from pathlib import Path import os -import json import sys import glob from datetime import datetime diff --git a/rules/common.smk b/rules/common.smk index 6ba4654..ad1d7c2 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -114,8 +114,8 @@ def get_tier_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_unsorted_data(setup) + return get_pattern_unsorted_data(setup, extension="*") elif tier == "raw": - return get_pattern_tier_daq(setup) + return get_pattern_tier_daq(setup, extension="*") else: return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/raw.smk b/rules/raw.smk index 8239519..59054ce 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -4,36 +4,41 @@ from scripts.util.patterns import ( get_pattern_log, get_pattern_tier_raw_blind, ) +from scripts.util.utils import set_last_rule_name -rule build_raw: - """ - This rule runs build raw, it takes in a daq file and outputs a raw file - """ - input: - get_pattern_tier_daq(setup), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(setup, "tier_raw"), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_raw.py " - "--log {log} " - f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" +for daq_ext in ("orca", "fcio"): + + rule: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension=daq_ext), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" + + set_last_rule_name(workflow, f"build_raw_{daq_ext}") rule build_raw_blind: @@ -42,7 +47,7 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace( + tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace( "{datatype}", "phy" ), blind_file=get_blinding_curve_file, @@ -53,7 +58,7 @@ rule build_raw_blind: output: get_pattern_tier_raw_blind(setup), log: - get_pattern_log(setup, "tier_raw_blind").replace("{datatype}", "phy"), + str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"), group: "tier-raw" resources: diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index 1bfc9f7..e44aa33 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -53,23 +53,22 @@ def full_channel_pattern_with_extension(): return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}" -def get_pattern_unsorted_data(setup): +def get_pattern_unsorted_data(setup, extension="orca"): if sandbox_path(setup) is not None: - return ( - Path(f"{sandbox_path(setup)}") - / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" + return Path(f"{sandbox_path(setup)}") / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension ) else: return None -def get_pattern_tier_daq(setup): +def get_pattern_tier_daq(setup, extension="orca"): return ( Path(f"{tier_daq_path(setup)}") / "{datatype}" / "{period}" / "{run}" - / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca" + / ("{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension) ) From 8eba704089dee0d8de5dd8f260be3c9103ee1263 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 16:36:38 +0100 Subject: [PATCH 034/101] Updates to build_raw Snakefile to support latest dataflow changes Commented broken pars catalog stuff for now --- Snakefile-build-raw | 69 +++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 98bd579..95d4a87 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -6,11 +6,10 @@ to the blinded raw data. It handles: - blinding the physics data """ -import pathlib, os, json, sys +import pathlib, os, sys from scripts.util.patterns import ( get_pattern_unsorted_data, get_pattern_tier_daq, - get_pattern_tier_raw, ) from scripts.util.utils import ( subst_vars_in_snakemake_config, @@ -20,8 +19,8 @@ from scripts.util.utils import ( filelist_path, pars_path, metadata_path, + det_status_path, ) -from scripts.util.pars_loading import pars_catalog import scripts.util as ds check_in_cycle = True @@ -36,16 +35,17 @@ configs = config_path(setup) chan_maps = chan_map_path(setup) swenv = runcmd(setup) meta = metadata_path(setup) +det_status = det_status_path(setup) basedir = workflow.basedir wildcard_constraints: - experiment="\w+", - period="p\d{2}", - run="r\d{3}", - datatype="\w{3}", - timestamp="\d{8}T\d{6}Z", + experiment=r"\w+", + period=r"p\d{2}", + run=r"r\d{3}", + datatype=r"\w{3}", + timestamp=r"\d{8}T\d{6}Z", localrules: @@ -53,25 +53,26 @@ localrules: autogen_output, -raw_par_catalog = ds.pars_key_resolve.get_par_catalog( - ["-*-*-*-cal"], - [ - get_pattern_unsorted_data(setup), - get_pattern_tier_daq(setup), - get_pattern_tier_raw(setup), - ], - {"cal": ["par_raw"]}, -) +# raw_par_catalog = ds.pars_key_resolve.get_par_catalog( +# ["-*-*-*-cal"], +# [ +# get_pattern_unsorted_data(setup), +# get_pattern_tier_daq(setup), +# get_pattern_tier(setup, "raw"), +# ], +# {"cal": ["par_raw"]}, +# ) onstart: print("Starting workflow") - raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") - if os.path.isfile(raw_par_cat_file): - os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) - pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) - ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") + # if os.path.isfile(raw_par_cat_file): + # os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) + # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) + # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + onsuccess: @@ -88,20 +89,29 @@ include: "rules/blinding_check.smk" rule gen_filelist: - """ - Generate file list. + """Generate file list. + + It is a checkpoint so when it is run it will update the dag passed on the + files it finds as an output. It does this by taking in the search pattern, + using this to find all the files that match this pattern, deriving the keys + from the files found and generating the list of new files needed. """ input: lambda wildcards: get_filelist( wildcards, setup, - get_tier_pattern(wildcards.tier), - ignore_keys_file=os.path.join(configs, "empty_keys.keylist"), - analysis_runs_file=None, + get_pattern_tier(setup, "raw", check_in_cycle=False), + ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", + analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: + if len(input) == 0: + print( + f"WARNING: No files found for the given pattern:{wildcards.label}", + "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", + ) with open(output[0], "w") as f: for fn in input: f.write(f"{fn}\n") @@ -118,3 +128,6 @@ rule sort_data: get_pattern_tier_daq(setup), shell: "mv {input} {output}" + + +# vim: ft=snakemake From e565e59f2b596508475fb3076baa0c87e0614374 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Fri, 17 Jan 2025 18:16:02 +0100 Subject: [PATCH 035/101] extension="*" does not work as expected, needs to be fixed in some other way --- Snakefile | 14 ++++++++------ Snakefile-build-raw | 21 +++++++++------------ rules/common.smk | 12 ------------ rules/filelist_gen.smk | 8 +++++--- 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/Snakefile b/Snakefile index 3a44ece..3a66e0a 100644 --- a/Snakefile +++ b/Snakefile @@ -157,10 +157,10 @@ onsuccess: rule gen_filelist: """Generate file list. - It is a checkpoint so when it is run it will update the dag passed on the - files it finds as an output. It does this by taking in the search pattern, - using this to find all the files that match this pattern, deriving the keys - from the files found and generating the list of new files needed. + This rule is used as a "checkpoint", so when it is run it will update the + DAG based on the files it finds. It does this by taking in the search + pattern, using this to find all the files that match this pattern, deriving + the keys from the files found and generating the list of new files needed. """ input: lambda wildcards: get_filelist( @@ -173,10 +173,12 @@ rule gen_filelist: output: temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: + print(f"INFO: found {len(input)} files") if len(input) == 0: print( - f"WARNING: No files found for the given pattern:{wildcards.label}", - "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", + f"WARNING: No files found for the given pattern:{wildcards.label}. " + "make sure pattern follows the format: " + "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" ) with open(output[0], "w") as f: for fn in input: diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 95d4a87..ef05855 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -88,29 +88,26 @@ include: "rules/raw.smk" include: "rules/blinding_check.smk" +# FIXME: cannot put extension="*", otherwise it won't be possible to extract +# keys (see FileKey.get_path_from_filekey()) rule gen_filelist: - """Generate file list. - - It is a checkpoint so when it is run it will update the dag passed on the - files it finds as an output. It does this by taking in the search pattern, - using this to find all the files that match this pattern, deriving the keys - from the files found and generating the list of new files needed. - """ input: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_unsorted_data(setup, extension="fcio"), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), run: + print(f"INFO: found {len(input)} files") if len(input) == 0: print( - f"WARNING: No files found for the given pattern:{wildcards.label}", - "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen", + f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. " + "make sure patterns follows the format: " + "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" ) with open(output[0], "w") as f: for fn in input: @@ -123,9 +120,9 @@ rule sort_data: to the sorted dirs under generated """ input: - get_pattern_unsorted_data(setup), + get_pattern_unsorted_data(setup, extension="fcio"), output: - get_pattern_tier_daq(setup), + get_pattern_tier_daq(setup, extension="fcio"), shell: "mv {input} {output}" diff --git a/rules/common.smk b/rules/common.smk index ad1d7c2..1f09470 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -107,15 +107,3 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): raise ValueError(f"Could not find name in {pars_files_overwrite}") else: return out_files - - -def get_tier_pattern(tier): - """ - This func gets the search pattern for the relevant tier passed. - """ - if tier == "daq": - return get_pattern_unsorted_data(setup, extension="*") - elif tier == "raw": - return get_pattern_tier_daq(setup, extension="*") - else: - return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index c90c570..e30b876 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -148,8 +148,9 @@ def build_filelist( analysis_runs=None, ): """ - This function builds the filelist for the given filekeys, search pattern and tier. - It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict + This function builds the filelist for the given filekeys, search pattern + and tier. It will ignore any keys in the ignore_keys list and only include + the keys specified in the analysis_runs dict. """ fn_pattern = get_pattern(setup, tier) @@ -220,7 +221,8 @@ def get_filelist( wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] - keypart = f'-{wildcards.label.split("-",1)[1]}' # remove the file selection from the keypart + # remove the file selection from the keypart + keypart = f'-{wildcards.label.split("-",1)[1]}' analysis_runs, ignore_keys = get_analysis_runs( ignore_keys_file, analysis_runs_file, file_selection ) From 0be642ff57645491eb2d1724e3ddebb9a562d034 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Sat, 18 Jan 2025 17:52:53 +0100 Subject: [PATCH 036/101] Renaming, JIT compile daq2lh5 onstart --- Snakefile-build-raw | 16 ++-- rules/common.smk | 4 - scripts/build_raw_fcio.py | 89 +++++++++++++++++++++ scripts/{build_raw.py => build_raw_orca.py} | 0 scripts/util/patterns.py | 2 +- 5 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 scripts/build_raw_fcio.py rename scripts/{build_raw.py => build_raw_orca.py} (100%) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index ef05855..fd9e795 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -7,10 +7,7 @@ to the blinded raw data. It handles: """ import pathlib, os, sys -from scripts.util.patterns import ( - get_pattern_unsorted_data, - get_pattern_tier_daq, -) +from scripts.util import patterns as patt from scripts.util.utils import ( subst_vars_in_snakemake_config, runcmd, @@ -65,7 +62,10 @@ localrules: onstart: - print("Starting workflow") + print("INFO: starting workflow") + + # Make sure some packages are initialized before we begin to avoid race conditions + shell('{swenv} python3 -B -c "import daq2lh5 "') # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") # if os.path.isfile(raw_par_cat_file): @@ -95,7 +95,7 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_unsorted_data(setup, extension="fcio"), + patt.get_pattern_tier_daq(setup, extension="fcio"), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), @@ -120,9 +120,9 @@ rule sort_data: to the sorted dirs under generated """ input: - get_pattern_unsorted_data(setup, extension="fcio"), + patt.get_pattern_tier_daq_unsorted(setup, extension="fcio"), output: - get_pattern_tier_daq(setup, extension="fcio"), + patt.get_pattern_tier_daq(setup, extension="fcio"), shell: "mv {input} {output}" diff --git a/rules/common.smk b/rules/common.smk index 1f09470..a259601 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -6,10 +6,6 @@ from pathlib import Path from scripts.util.patterns import ( par_overwrite_path, get_pars_path, - get_pattern_unsorted_data, - get_pattern_tier_daq, - get_pattern_tier, - get_pattern_plts_tmp_channel, ) from scripts.util import ProcessingFileKey from scripts.util.catalog import Catalog diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py new file mode 100644 index 0000000..7f17329 --- /dev/null +++ b/scripts/build_raw_fcio.py @@ -0,0 +1,89 @@ +import argparse +import logging +from pathlib import Path + +import numpy as np +from daq2lh5 import build_raw +from legendmeta import TextDB +from legendmeta.catalog import Props +from utils.log import build_log + +argparser = argparse.ArgumentParser() +argparser.add_argument("input", help="input file", type=str) +argparser.add_argument("output", help="output file", type=str) +argparser.add_argument("--datatype", help="Datatype", type=str, required=True) +argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) +argparser.add_argument("--configs", help="config file", type=str) +argparser.add_argument("--chan_maps", help="chan map", type=str) +argparser.add_argument("--log", help="log file", type=str) +args = argparser.parse_args() + +Path(args.log).parent.makedir(parents=True, exist_ok=True) +logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + +Path(args.output).parent.mkdir(parents=True, exist_ok=True) + +configs = TextDB(args.configs, lazy=True) +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] + +log = build_log(config_dict, args.log) + +channel_dict = config_dict["inputs"] +settings = Props.read_from(channel_dict["settings"]) +channel_dict = channel_dict["out_spec"] +all_config = Props.read_from(channel_dict["gen_config"]) + +chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") + +if "geds_config" in list(channel_dict): + ged_config = Props.read_from(channel_dict["geds_config"]) + + ged_channels = list( + chmap.geds.map("daq.rawid") + ) + + ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) + Props.add_to(all_config, ged_config) + +if "spms_config" in list(channel_dict): + spm_config = Props.read_from(channel_dict["spms_config"]) + + spm_channels = list( + chmap.spms.map("daq.rawid") + ) + + spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) + Props.add_to(all_config, spm_config) + +if "auxs_config" in list(channel_dict): + aux_config = Props.read_from(channel_dict["auxs_config"]) + aux_channels = list( + chmap.auxs.map("daq.rawid") + ) + aux_channels += list( + chmap.puls.map("daq.rawid") + ) + aux_channels += list( + chmap.bsln.map("daq.rawid") + ) + top_key = next(iter(aux_config)) + aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels) + Props.add_to(all_config, aux_config) + +if "muon_config" in list(channel_dict): + muon_config = Props.read_from(channel_dict["muon_config"]) + muon_channels = list( + chmap.muon.map("daq.rawid") + ) + top_key = next(iter(muon_config)) + muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels) + Props.add_to(all_config, muon_config) + +rng = np.random.default_rng() +rand_num = f"{rng.integers(0,99999):05d}" +temp_output = f"{args.output}.{rand_num}" + +build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) + +# rename the temp file +Path(temp_output).rename(args.output) diff --git a/scripts/build_raw.py b/scripts/build_raw_orca.py similarity index 100% rename from scripts/build_raw.py rename to scripts/build_raw_orca.py diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py index e44aa33..28d27db 100644 --- a/scripts/util/patterns.py +++ b/scripts/util/patterns.py @@ -53,7 +53,7 @@ def full_channel_pattern_with_extension(): return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}" -def get_pattern_unsorted_data(setup, extension="orca"): +def get_pattern_tier_daq_unsorted(setup, extension="orca"): if sandbox_path(setup) is not None: return Path(f"{sandbox_path(setup)}") / ( "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension From 4dcd0d2ee04d954f4be68215282b686660aea770 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 20 Jan 2025 12:40:49 +0100 Subject: [PATCH 037/101] Several fixes to build_raw.py scripts --- scripts/build_raw_blind.py | 2 +- scripts/build_raw_fcio.py | 78 ++++++++++++++------------------------ scripts/build_raw_orca.py | 8 ++-- scripts/util/log.py | 2 +- 4 files changed, 34 insertions(+), 56 deletions(-) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0957c7a..0b036dd 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -18,7 +18,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py index 7f17329..b4d2e22 100644 --- a/scripts/build_raw_fcio.py +++ b/scripts/build_raw_fcio.py @@ -1,12 +1,12 @@ import argparse -import logging +from copy import deepcopy from pathlib import Path import numpy as np from daq2lh5 import build_raw -from legendmeta import TextDB -from legendmeta.catalog import Props -from utils.log import build_log +from dbetto import TextDB +from dbetto.catalog import Props +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) @@ -18,66 +18,44 @@ argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - Path(args.output).parent.mkdir(parents=True, exist_ok=True) -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] +config_dict = ( + TextDB(args.configs, lazy=True) + .on(args.timestamp, system=args.datatype) + .snakemake_rules.tier_raw_fcio +) log = build_log(config_dict, args.log) -channel_dict = config_dict["inputs"] -settings = Props.read_from(channel_dict["settings"]) -channel_dict = channel_dict["out_spec"] -all_config = Props.read_from(channel_dict["gen_config"]) +channel_dict = config_dict.inputs +settings = Props.read_from(channel_dict.settings) +channel_dict = channel_dict.out_spec +all_config = Props.read_from(channel_dict.gen_config) chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") -if "geds_config" in list(channel_dict): - ged_config = Props.read_from(channel_dict["geds_config"]) - - ged_channels = list( - chmap.geds.map("daq.rawid") - ) +if "geds_config" in channel_dict: + raise NotImplementedError() - ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) - Props.add_to(all_config, ged_config) +if "spms_config" in channel_dict: + spm_config = Props.read_from(channel_dict.spms_config) + spm_channels = chmap.spms.map("daq.rawid") -if "spms_config" in list(channel_dict): - spm_config = Props.read_from(channel_dict["spms_config"]) + for rawid, chinfo in spm_channels.items(): + cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"]) + cfg_block["key_list"] = [chinfo.daq.fc_channel] + spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block - spm_channels = list( - chmap.spms.map("daq.rawid") - ) + spm_config["FCEventDecoder"].pop("__output_table_name__") - spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) Props.add_to(all_config, spm_config) -if "auxs_config" in list(channel_dict): - aux_config = Props.read_from(channel_dict["auxs_config"]) - aux_channels = list( - chmap.auxs.map("daq.rawid") - ) - aux_channels += list( - chmap.puls.map("daq.rawid") - ) - aux_channels += list( - chmap.bsln.map("daq.rawid") - ) - top_key = next(iter(aux_config)) - aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels) - Props.add_to(all_config, aux_config) - -if "muon_config" in list(channel_dict): - muon_config = Props.read_from(channel_dict["muon_config"]) - muon_channels = list( - chmap.muon.map("daq.rawid") - ) - top_key = next(iter(muon_config)) - muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels) - Props.add_to(all_config, muon_config) +if "auxs_config" in channel_dict: + raise NotImplementedError() + +if "muon_config" in channel_dict: + raise NotImplementedError() rng = np.random.default_rng() rand_num = f"{rng.integers(0,99999):05d}" diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py index 7e1dd1b..b307b01 100644 --- a/scripts/build_raw_orca.py +++ b/scripts/build_raw_orca.py @@ -4,8 +4,8 @@ import numpy as np from daq2lh5 import build_raw -from legendmeta import TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props from utils.log import build_log argparser = argparse.ArgumentParser() @@ -15,10 +15,10 @@ argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--configs", help="config file", type=str) argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--log", help="log file", type=str) +argparser.add_argument("--log", help="log file") args = argparser.parse_args() -Path(args.log).parent.makedir(parents=True, exist_ok=True) +Path(args.log).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") Path(args.output).parent.mkdir(parents=True, exist_ok=True) diff --git a/scripts/util/log.py b/scripts/util/log.py index 79b97c5..9a9b191 100644 --- a/scripts/util/log.py +++ b/scripts/util/log.py @@ -22,7 +22,7 @@ def build_log(config_dict, log_file=None): log = logging.getLogger(config_dict["options"].get("logger", "prod")) else: if log_file is not None: - Path(log_file).parent.makedir(parents=True, exist_ok=True) + Path(log_file).parent.mkdir(parents=True, exist_ok=True) logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w") log = logging.getLogger(__name__) return log From 3c2a166554630057cec669b6434ac54bc63b48dc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 13:02:35 +0100 Subject: [PATCH 038/101] allow filelist globbing for daq fcio/orca files --- Snakefile | 2 +- Snakefile-build-raw | 4 +--- rules/common.smk | 12 ++++++++++++ rules/filelist_gen.smk | 14 ++++++++++---- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Snakefile b/Snakefile index 3a66e0a..945b4dd 100644 --- a/Snakefile +++ b/Snakefile @@ -166,7 +166,7 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/Snakefile-build-raw b/Snakefile-build-raw index ef05855..2ace6f7 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -88,14 +88,12 @@ include: "rules/raw.smk" include: "rules/blinding_check.smk" -# FIXME: cannot put extension="*", otherwise it won't be possible to extract -# keys (see FileKey.get_path_from_filekey()) rule gen_filelist: input: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_unsorted_data(setup, extension="fcio"), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/rules/common.smk b/rules/common.smk index 1f09470..da79753 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -107,3 +107,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): raise ValueError(f"Could not find name in {pars_files_overwrite}") else: return out_files + + +def get_search_pattern(tier): + """ + This func gets the search pattern for the relevant tier passed. + """ + if tier == "daq": + return get_pattern_unsorted_data(setup, extension="*") + elif tier == "raw": + return get_pattern_tier_daq(setup, extension="*") + else: + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index e30b876..7975fa8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -3,7 +3,11 @@ import json, yaml from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper -from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind +from scripts.util.patterns import ( + get_pattern_tier, + get_pattern_tier_raw_blind, + get_pattern_tier_daq, +) concat_datatypes = ["phy"] concat_tiers = ["skm", "pet_concat", "evt_concat"] @@ -114,6 +118,8 @@ def get_pattern(setup, tier): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) + elif tier == "daq": + fn_pattern = get_pattern_tier_daq(setup, extension="{ext}") else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) return fn_pattern @@ -158,13 +164,13 @@ def build_filelist( ignore_keys = [] if analysis_runs is None: analysis_runs = {} - phy_filenames = [] other_filenames = [] for key in filekeys: - fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] + if Path(search_pattern).suffix == ".*": + search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) - for f in files: _key = FileKey.get_filekey_from_pattern(f, search_pattern) if _key.name in ignore_keys: From 1dcd0274c0c288cece654dc47b62ae671526a3cc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 16:46:55 +0100 Subject: [PATCH 039/101] have par catalog build support multiple file extensions, split out build raw rule into orca and fcio --- Snakefile-build-raw | 36 +++++------ rules/filelist_gen.smk | 2 +- rules/raw.smk | 98 ++++++++++++++++++++--------- scripts/util/create_pars_keylist.py | 10 ++- 4 files changed, 92 insertions(+), 54 deletions(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index e6c7c62..2635a5d 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -50,15 +50,11 @@ localrules: autogen_output, -# raw_par_catalog = ds.pars_key_resolve.get_par_catalog( -# ["-*-*-*-cal"], -# [ -# get_pattern_unsorted_data(setup), -# get_pattern_tier_daq(setup), -# get_pattern_tier(setup, "raw"), -# ], -# {"cal": ["par_raw"]}, -# ) +include: "rules/common.smk" +include: "rules/filelist_gen.smk" +include: "rules/main.smk" +include: "rules/raw.smk" +include: "rules/blinding_check.smk" onstart: @@ -67,12 +63,17 @@ onstart: # Make sure some packages are initialized before we begin to avoid race conditions shell('{swenv} python3 -B -c "import daq2lh5 "') - # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") - # if os.path.isfile(raw_par_cat_file): - # os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) - # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) - # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") + if os.path.isfile(raw_par_cat_file): + os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) + try: + pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir( + parents=True, exist_ok=True + ) + ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) + except NameError: + pass onsuccess: @@ -81,13 +82,6 @@ onsuccess: shell(f"rm {filelist_path(setup)}/* || true") -include: "rules/common.smk" -include: "rules/filelist_gen.smk" -include: "rules/main.smk" -include: "rules/raw.smk" -include: "rules/blinding_check.smk" - - rule gen_filelist: input: lambda wildcards: get_filelist( diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 7975fa8..24a94f5 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -168,7 +168,7 @@ def build_filelist( other_filenames = [] for key in filekeys: if Path(search_pattern).suffix == ".*": - search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + search_pattern = Path(search_pattern).with_suffix(".{ext}") fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) for f in files: diff --git a/rules/raw.smk b/rules/raw.smk index 59054ce..fd95467 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,44 +1,82 @@ from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, ) from scripts.util.utils import set_last_rule_name +from scripts.util.create_pars_keylist import ParsKeyResolve + +raw_par_catalog = ParsKeyResolve.get_par_catalog( + ["-*-*-*-cal"], + [ + get_pattern_tier_daq_unsorted(setup, extension="*"), + get_pattern_tier_daq(setup, extension="*"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ], + {"cal": ["par_raw"]}, +) -for daq_ext in ("orca", "fcio"): +rule build_raw_orca: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="orca"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_orca.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" - rule: - """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file - """ - input: - get_pattern_tier_daq(setup, extension=daq_ext), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(setup, "tier_raw"), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py " - "--log {log} " - f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" - set_last_rule_name(workflow, f"build_raw_{daq_ext}") +rule build_raw_fcio: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="fcio"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_fcio.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" rule build_raw_blind: diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index c3e1f22..a82ef0c 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict): @staticmethod def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) + if Path(search_pattern).suffix == ".*": + search_pattern = Path(search_pattern).with_suffix(".{ext}") + wildcard_dict = dict(ext="*", **d._asdict()) + else: + wildcard_dict = d._asdict() try: tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) - except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) - fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] + fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] p = Path(fn_glob_pattern) parts = p.parts[p.is_absolute() :] files = Path(p.root).glob(str(Path(*parts))) @@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern): m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() + if "ext" in d: + d.pop("ext") key = FileKey(**d) keys.append(key) return keys From 0438539594fae88597baf3edea099be3b293829a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 17:50:40 +0100 Subject: [PATCH 040/101] fix par catalog write --- Snakefile-build-raw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 2635a5d..7a4779f 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -19,6 +19,7 @@ from scripts.util.utils import ( det_status_path, ) import scripts.util as ds +from scripts.util.create_pars_keylist import ParsKeyResolve check_in_cycle = True @@ -71,7 +72,7 @@ onstart: pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir( parents=True, exist_ok=True ) - ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) + ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) except NameError: pass From 25a6183e9416437ff7617d7403f1749be9810ea1 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 18:28:13 +0100 Subject: [PATCH 041/101] fix daq filelist --- rules/common.smk | 5 ++++- rules/filelist_gen.smk | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/rules/common.smk b/rules/common.smk index 288d06c..5625c79 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -4,6 +4,9 @@ Helper functions for running data production from pathlib import Path from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, + get_pattern_tier_daq, + get_pattern_tier, par_overwrite_path, get_pars_path, ) @@ -110,7 +113,7 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_unsorted_data(setup, extension="*") + return get_pattern_tier_daq_unsorted(setup, extension="*") elif tier == "raw": return get_pattern_tier_daq(setup, extension="*") else: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 24a94f5..b3255f8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -184,6 +184,10 @@ def build_filelist( filename = FileKey.get_path_from_filekey( _key, get_pattern_tier(setup, "pet", check_in_cycle=False) ) + elif tier == "daq": + filename = FileKey.get_path_from_filekey( + _key, fn_pattern.with_suffix(Path(f).suffix) + ) else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) From 325c92039d69c21607a672e3b11c01cc589aa4cd Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 13:02:35 +0100 Subject: [PATCH 042/101] allow filelist globbing for daq fcio/orca files --- Snakefile | 2 +- Snakefile-build-raw | 4 +--- rules/common.smk | 12 ++++++++++++ rules/filelist_gen.smk | 14 ++++++++++---- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Snakefile b/Snakefile index 3a66e0a..945b4dd 100644 --- a/Snakefile +++ b/Snakefile @@ -166,7 +166,7 @@ rule gen_filelist: lambda wildcards: get_filelist( wildcards, setup, - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/Snakefile-build-raw b/Snakefile-build-raw index fd9e795..e6c7c62 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -88,14 +88,12 @@ include: "rules/raw.smk" include: "rules/blinding_check.smk" -# FIXME: cannot put extension="*", otherwise it won't be possible to extract -# keys (see FileKey.get_path_from_filekey()) rule gen_filelist: input: lambda wildcards: get_filelist( wildcards, setup, - patt.get_pattern_tier_daq(setup, extension="fcio"), + get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), diff --git a/rules/common.smk b/rules/common.smk index a259601..288d06c 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -103,3 +103,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): raise ValueError(f"Could not find name in {pars_files_overwrite}") else: return out_files + + +def get_search_pattern(tier): + """ + This func gets the search pattern for the relevant tier passed. + """ + if tier == "daq": + return get_pattern_unsorted_data(setup, extension="*") + elif tier == "raw": + return get_pattern_tier_daq(setup, extension="*") + else: + return get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index e30b876..7975fa8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -3,7 +3,11 @@ import json, yaml from pathlib import Path from scripts.util.FileKey import FileKey, run_grouper -from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind +from scripts.util.patterns import ( + get_pattern_tier, + get_pattern_tier_raw_blind, + get_pattern_tier_daq, +) concat_datatypes = ["phy"] concat_tiers = ["skm", "pet_concat", "evt_concat"] @@ -114,6 +118,8 @@ def get_pattern(setup, tier): fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) + elif tier == "daq": + fn_pattern = get_pattern_tier_daq(setup, extension="{ext}") else: fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) return fn_pattern @@ -158,13 +164,13 @@ def build_filelist( ignore_keys = [] if analysis_runs is None: analysis_runs = {} - phy_filenames = [] other_filenames = [] for key in filekeys: - fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0] + if Path(search_pattern).suffix == ".*": + search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) - for f in files: _key = FileKey.get_filekey_from_pattern(f, search_pattern) if _key.name in ignore_keys: From 8197a3f94b08f5c3a95a1fd61abe12f0b1f666c2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 16:46:55 +0100 Subject: [PATCH 043/101] have par catalog build support multiple file extensions, split out build raw rule into orca and fcio --- Snakefile-build-raw | 37 +++++------ rules/common.smk | 5 +- rules/filelist_gen.smk | 6 +- rules/raw.smk | 98 ++++++++++++++++++++--------- scripts/util/create_pars_keylist.py | 10 ++- 5 files changed, 101 insertions(+), 55 deletions(-) diff --git a/Snakefile-build-raw b/Snakefile-build-raw index e6c7c62..7a4779f 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -19,6 +19,7 @@ from scripts.util.utils import ( det_status_path, ) import scripts.util as ds +from scripts.util.create_pars_keylist import ParsKeyResolve check_in_cycle = True @@ -50,15 +51,11 @@ localrules: autogen_output, -# raw_par_catalog = ds.pars_key_resolve.get_par_catalog( -# ["-*-*-*-cal"], -# [ -# get_pattern_unsorted_data(setup), -# get_pattern_tier_daq(setup), -# get_pattern_tier(setup, "raw"), -# ], -# {"cal": ["par_raw"]}, -# ) +include: "rules/common.smk" +include: "rules/filelist_gen.smk" +include: "rules/main.smk" +include: "rules/raw.smk" +include: "rules/blinding_check.smk" onstart: @@ -67,12 +64,17 @@ onstart: # Make sure some packages are initialized before we begin to avoid race conditions shell('{swenv} python3 -B -c "import daq2lh5 "') - # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") - # if os.path.isfile(raw_par_cat_file): - # os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) - # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True) - # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file) + raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl") + if os.path.isfile(raw_par_cat_file): + os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl")) + try: + pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir( + parents=True, exist_ok=True + ) + ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) + except NameError: + pass onsuccess: @@ -81,13 +83,6 @@ onsuccess: shell(f"rm {filelist_path(setup)}/* || true") -include: "rules/common.smk" -include: "rules/filelist_gen.smk" -include: "rules/main.smk" -include: "rules/raw.smk" -include: "rules/blinding_check.smk" - - rule gen_filelist: input: lambda wildcards: get_filelist( diff --git a/rules/common.smk b/rules/common.smk index 288d06c..5625c79 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -4,6 +4,9 @@ Helper functions for running data production from pathlib import Path from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, + get_pattern_tier_daq, + get_pattern_tier, par_overwrite_path, get_pars_path, ) @@ -110,7 +113,7 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_unsorted_data(setup, extension="*") + return get_pattern_tier_daq_unsorted(setup, extension="*") elif tier == "raw": return get_pattern_tier_daq(setup, extension="*") else: diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 7975fa8..b3255f8 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -168,7 +168,7 @@ def build_filelist( other_filenames = [] for key in filekeys: if Path(search_pattern).suffix == ".*": - search_pattern = Path(str(search_pattern).replace(".*", ".{ext}")) + search_pattern = Path(search_pattern).with_suffix(".{ext}") fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0] files = glob.glob(fn_glob_pattern) for f in files: @@ -184,6 +184,10 @@ def build_filelist( filename = FileKey.get_path_from_filekey( _key, get_pattern_tier(setup, "pet", check_in_cycle=False) ) + elif tier == "daq": + filename = FileKey.get_path_from_filekey( + _key, fn_pattern.with_suffix(Path(f).suffix) + ) else: filename = FileKey.get_path_from_filekey(_key, fn_pattern) diff --git a/rules/raw.smk b/rules/raw.smk index 59054ce..fd95467 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,44 +1,82 @@ from scripts.util.patterns import ( + get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, ) from scripts.util.utils import set_last_rule_name +from scripts.util.create_pars_keylist import ParsKeyResolve + +raw_par_catalog = ParsKeyResolve.get_par_catalog( + ["-*-*-*-cal"], + [ + get_pattern_tier_daq_unsorted(setup, extension="*"), + get_pattern_tier_daq(setup, extension="*"), + get_pattern_tier(setup, "raw", check_in_cycle=False), + ], + {"cal": ["par_raw"]}, +) -for daq_ext in ("orca", "fcio"): +rule build_raw_orca: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="orca"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_orca.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" - rule: - """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file - """ - input: - get_pattern_tier_daq(setup, extension=daq_ext), - params: - timestamp="{timestamp}", - datatype="{datatype}", - ro_input=lambda _, input: ro(input), - output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), - log: - get_pattern_log(setup, "tier_raw"), - group: - "tier-raw" - resources: - mem_swap=110, - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py " - "--log {log} " - f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "{params.ro_input} {output}" - set_last_rule_name(workflow, f"build_raw_{daq_ext}") +rule build_raw_fcio: + """ + This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + """ + input: + get_pattern_tier_daq(setup, extension="fcio"), + params: + timestamp="{timestamp}", + datatype="{datatype}", + ro_input=lambda _, input: ro(input), + output: + get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + log: + get_pattern_log(setup, "tier_raw"), + group: + "tier-raw" + resources: + mem_swap=110, + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}" + f"/../scripts/build_raw_fcio.py " + "--log {log} " + f"--configs {ro(configs)} " + f"--chan_maps {ro(chan_maps)} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "{params.ro_input} {output}" rule build_raw_blind: diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py index c3e1f22..a82ef0c 100644 --- a/scripts/util/create_pars_keylist.py +++ b/scripts/util/create_pars_keylist.py @@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict): @staticmethod def get_keys(keypart, search_pattern): d = FileKey.parse_keypart(keypart) + if Path(search_pattern).suffix == ".*": + search_pattern = Path(search_pattern).with_suffix(".{ext}") + wildcard_dict = dict(ext="*", **d._asdict()) + else: + wildcard_dict = d._asdict() try: tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) - except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) - fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0] + fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] p = Path(fn_glob_pattern) parts = p.parts[p.is_absolute() :] files = Path(p.root).glob(str(Path(*parts))) @@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern): m = tier_pattern_rx.match(str(f)) if m is not None: d = m.groupdict() + if "ext" in d: + d.pop("ext") key = FileKey(**d) keys.append(key) return keys From 48b326dbd4eadbd0c8334320d0af4a27fbadfd7f Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Mon, 20 Jan 2025 19:10:24 +0100 Subject: [PATCH 044/101] A lot of fixes in complete_run.py --- rules/main.smk | 2 +- rules/raw.smk | 4 +-- scripts/build_fdb.py | 2 +- scripts/check_blinding.py | 2 +- scripts/complete_run.py | 51 +++++++++++++++++++++------------------ scripts/util/__init__.py | 16 ++++++------ scripts/util/utils.py | 4 +-- 7 files changed, 42 insertions(+), 39 deletions(-) diff --git a/rules/main.smk b/rules/main.smk index 153fab4..be671c0 100644 --- a/rules/main.smk +++ b/rules/main.smk @@ -48,6 +48,6 @@ rule autogen_output: filedb_path=os.path.join(pars_path(setup), "filedb"), setup=lambda wildcards: setup, basedir=basedir, - threads: workflow.cores + threads: min(workflow.cores, 64) script: "../scripts/complete_run.py" diff --git a/rules/raw.smk b/rules/raw.smk index fd95467..411b23f 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -21,7 +21,7 @@ raw_par_catalog = ParsKeyResolve.get_par_catalog( rule build_raw_orca: """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + This rule runs build_raw(), it takes in a file.fcio and outputs a raw file """ input: get_pattern_tier_daq(setup, extension="orca"), @@ -51,7 +51,7 @@ rule build_raw_orca: rule build_raw_fcio: """ - This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file + This rule runs build_raw(), it takes in a file.fcio and outputs a raw file """ input: get_pattern_tier_daq(setup, extension="fcio"), diff --git a/scripts/build_fdb.py b/scripts/build_fdb.py index b9c127b..f628341 100644 --- a/scripts/build_fdb.py +++ b/scripts/build_fdb.py @@ -3,7 +3,7 @@ from pathlib import Path import numpy as np -from legendmeta.catalog import Props +from dbetto.catalog import Props from lgdo import lh5 from pygama.flow.file_db import FileDB diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 44261a5..4298c6e 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -84,7 +84,7 @@ # check for peaks within +- 5keV of 2614 and 583 to ensure blinding still # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check -if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False: +if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False: Path(args.output).parent.mkdir(parents=True, exist_ok=True) Props.write_to(args.output, {}) else: diff --git a/scripts/complete_run.py b/scripts/complete_run.py index e4c5eb3..03cfd51 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -3,6 +3,7 @@ import datetime import json import os +import subprocess import time from pathlib import Path @@ -157,12 +158,12 @@ def find_gen_runs(gen_tier_path): # first look for non-concat tiers paths = gen_tier_path.glob("*/*/*/*") # use the directories to build a datatype/period/run string - runs = {"/".join(p.name.split("/")[-3:]) for p in paths} + runs = {"/".join(str(p).split("/")[-3:]) for p in paths} # then look for concat tiers (use filenames now) paths_concat = gen_tier_path.glob("*/*/*.lh5") # use the directories to build a datatype/period/run string - runs_concat = {"/".join([p.name.split("-")[3]] + p.name.split("-")[1:3]) for p in paths_concat} + runs_concat = {"/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat} return runs | runs_concat @@ -188,30 +189,32 @@ def build_file_dbs(gen_tier_path, outdir): logfile = Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name print(f"INFO: ......building {outfile}") - cmdline = ut.runcmd(snakemake.params.setup, aslist=True) - prodenv = as_ro(os.getenv("PRODENV")) - cmdline += [f"--env=PRODENV={prodenv}"] + cmdline = [ + *ut.runcmd(snakemake.params.setup, aslist=True), + "--", + "python3", + "-B", + f"{snakemake.params.basedir}/scripts/build_fdb.py", + "--scan-path", + spec, + "--output", + str(outfile), + "--config", + str(outdir / "file_db_config.json"), + "--log", + str(logfile), + ] + + if speck[0] == "phy": + cmdline += ["--assume-nonsparse"] + + print(cmdline) + print(" ".join(cmdline)) + + cmdenv = {} # TODO: forward stdout to log file - processes.add( - subprocess.Popen( - [ - *cmdline, - "python3", - "-B", - f"{snakemake.params.basedir}/scripts/build_fdb.py", - "--scan-path", - spec, - "--output", - str(outfile), - "--config", - str(outdir / "file_db_config.json"), - "--log", - str(logfile), - "--assume-nonsparse" if speck[0] == "phy" else "", - ], - ) - ) + processes.add(subprocess.Popen(cmdline)) if len(processes) >= snakemake.threads: os.wait() diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py index caa4dd2..d103033 100644 --- a/scripts/util/__init__.py +++ b/scripts/util/__init__.py @@ -12,18 +12,18 @@ ) __all__ = [ - "Props", - "PropsStream", - "Catalog", - "ParsKeyResolve", "CalGrouping", - "FileKey", - "ProcessingFileKey", + "Catalog", "ChannelProcKey", + "FileKey", "ParsCatalog", - "unix_time", + "ParsKeyResolve", + "ProcessingFileKey", + "Props", + "PropsStream", "runcmd", - "subst_vars_impl", "subst_vars", + "subst_vars_impl", "subst_vars_in_snakemake_config", + "unix_time", ] diff --git a/scripts/util/utils.py b/scripts/util/utils.py index 9d64b06..0b45a81 100644 --- a/scripts/util/utils.py +++ b/scripts/util/utils.py @@ -110,9 +110,9 @@ def filelist_path(setup): def runcmd(setup, aslist=False): cmdline = shlex.split(setup["execenv"]["cmd"]) - cmdline += ["--env=" + "'PYTHONUSERBASE=" + f"{setup['paths']['install']}" + "'"] + cmdline += ["--env=" + "PYTHONUSERBASE=" + f"{setup['paths']['install']}"] if "env" in setup["execenv"]: - cmdline += [f'--env="{var}={val}"' for var, val in setup["execenv"]["env"].items()] + cmdline += [f"--env={var}={val}" for var, val in setup["execenv"]["env"].items()] cmdline += shlex.split(setup["execenv"]["arg"]) From 0b558ddb43988f1134e58d9dce61c9c8b1b295ea Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 21:22:28 +0100 Subject: [PATCH 045/101] fix weird filelist len bug by moving to script --- Snakefile-build-raw | 13 ++----------- scripts/write_filelist.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 11 deletions(-) create mode 100644 scripts/write_filelist.py diff --git a/Snakefile-build-raw b/Snakefile-build-raw index 7a4779f..c4fb1dd 100644 --- a/Snakefile-build-raw +++ b/Snakefile-build-raw @@ -94,17 +94,8 @@ rule gen_filelist: ), output: temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), - run: - print(f"INFO: found {len(input)} files") - if len(input) == 0: - print( - f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. " - "make sure patterns follows the format: " - "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" - ) - with open(output[0], "w") as f: - for fn in input: - f.write(f"{fn}\n") + script: + "scripts/write_filelist.py" rule sort_data: diff --git a/scripts/write_filelist.py b/scripts/write_filelist.py new file mode 100644 index 0000000..f27c2ad --- /dev/null +++ b/scripts/write_filelist.py @@ -0,0 +1,14 @@ +# ruff: noqa: F821, T201 +# from snakemake.script import snakemake # snakemake > 8.16 +from pathlib import Path + +print(f"INFO: found {len(snakemake.input)} files") +if len(snakemake.input) == 0: + print( + f"WARNING: no DAQ files found for the given pattern: {snakemake.wildcards.label}. " + "make sure patterns follows the format: " + "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen" + ) +with Path(snakemake.output[0]).open("w") as f: + for fn in snakemake.input: + f.write(f"{fn}\n") From 689164bcc2ecee28bbead6d7c83f30d6dca7d6e4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 20 Jan 2025 23:30:50 +0100 Subject: [PATCH 046/101] fix log import --- scripts/build_dsp.py | 2 +- scripts/build_hit.py | 2 +- scripts/build_raw_orca.py | 2 +- scripts/build_skm.py | 2 +- scripts/build_tcm.py | 2 +- scripts/pars_dsp_build_svm.py | 2 +- scripts/pars_dsp_dplms.py | 2 +- scripts/pars_dsp_eopt.py | 2 +- scripts/pars_dsp_event_selection.py | 2 +- scripts/pars_dsp_nopt.py | 2 +- scripts/pars_dsp_tau.py | 2 +- scripts/pars_hit_aoe.py | 2 +- scripts/pars_hit_ecal.py | 2 +- scripts/pars_hit_lq.py | 2 +- scripts/pars_hit_qc.py | 2 +- scripts/pars_pht_aoecal.py | 2 +- scripts/pars_pht_fast.py | 2 +- scripts/pars_pht_lqcal.py | 2 +- scripts/pars_pht_partcal.py | 2 +- scripts/pars_pht_qc.py | 2 +- scripts/pars_pht_qc_phy.py | 2 +- scripts/pars_tcm_pulser.py | 2 +- 22 files changed, 22 insertions(+), 22 deletions(-) diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index f7b4141..603124d 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -9,7 +9,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from utils.log import build_log +from util.log import build_log def replace_list_with_array(dic): diff --git a/scripts/build_hit.py b/scripts/build_hit.py index cec39b7..cd48f7c 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -6,7 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from pygama.hit.build_hit import build_hit -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py index b307b01..711ecdd 100644 --- a/scripts/build_raw_orca.py +++ b/scripts/build_raw_orca.py @@ -6,7 +6,7 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index c8ff972..cfd52e0 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -5,7 +5,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors -from utils.log import build_log +from util.log import build_log def get_all_out_fields(input_table, out_fields, current_field=""): diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 4707410..156e4c8 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -7,7 +7,7 @@ from legendmeta import TextDB from legendmeta.catalog import Props from pygama.evt.build_tcm import build_tcm -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index a31a8c1..b9174ec 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -6,7 +6,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from sklearn.svm import SVC -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 64c7a9f..3e99228 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -10,7 +10,7 @@ from legendmeta.catalog import Props from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 5e9a009..c95842d 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -17,7 +17,7 @@ run_bayesian_optimisation, run_one_dsp, ) -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 9999134..d5a924c 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -14,7 +14,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 85883b8..766159c 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -10,7 +10,7 @@ from legendmeta.catalog import Props from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp -from utils.log import build_log +from util.log import build_log sto = lh5.LH5Store() diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 4f3cf9d..b45a801 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -9,7 +9,7 @@ from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index 4d3f503..c61322c 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -15,7 +15,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index aab5f41..b8ba61a 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -23,7 +23,7 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) mpl.use("agg") diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 3487c38..48811ad 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -16,7 +16,7 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 6b3369f..d68aaeb 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -19,7 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 91ae176..0591f53 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -18,7 +18,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index b8d48d2..f916ad3 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -16,7 +16,7 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 101acea..7185ab1 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -18,7 +18,7 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 6eb25eb..228107e 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -18,7 +18,7 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from util.FileKey import ChannelProcKey, ProcessingFileKey -from utils.log import build_log +from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index f3f634b..e79014f 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -19,7 +19,7 @@ ) from pygama.pargen.utils import load_data from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index e642aa3..628a104 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -18,7 +18,7 @@ get_keys, ) from util.convert_np import convert_dict_np_to_float -from utils.log import build_log +from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index 4ae8843..c48338a 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -7,7 +7,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from utils.log import build_log +from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) From 2c47ca94d71090a1eba293007f5e79c4441b0b46 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 21 Jan 2025 14:43:55 +0100 Subject: [PATCH 047/101] Remove leftover print statements --- scripts/complete_run.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/complete_run.py b/scripts/complete_run.py index 03cfd51..c462367 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -208,9 +208,6 @@ def build_file_dbs(gen_tier_path, outdir): if speck[0] == "phy": cmdline += ["--assume-nonsparse"] - print(cmdline) - print(" ".join(cmdline)) - cmdenv = {} # TODO: forward stdout to log file From 35e8b562542c88243a6f6a87aecf8c96a7496726 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 22 Jan 2025 00:00:11 +0100 Subject: [PATCH 048/101] Rename scripts.util to scripts.library --- .ruff.toml | 1 - Snakefile | 10 +++++----- rules/ann.smk | 2 +- rules/blinding_calibration.smk | 2 +- rules/blinding_check.smk | 2 +- rules/chanlist_gen.smk | 6 +++--- rules/common.smk | 8 ++++---- rules/dsp.smk | 8 ++++---- rules/evt.smk | 4 ++-- rules/filelist_gen.smk | 4 ++-- rules/hit.smk | 6 +++--- rules/main.smk | 2 +- rules/pht.smk | 8 ++++---- rules/pht_fast.smk | 8 ++++---- rules/psp.smk | 8 ++++---- rules/qc_phy.smk | 8 ++++---- rules/raw.smk | 6 +++--- rules/skm.smk | 2 +- rules/tcm.smk | 2 +- scripts/build_dsp.py | 2 +- scripts/build_evt.py | 2 +- scripts/build_hit.py | 2 +- scripts/build_raw_blind.py | 2 +- scripts/build_raw_fcio.py | 2 +- scripts/build_raw_orca.py | 2 +- scripts/build_skm.py | 2 +- scripts/build_tcm.py | 2 +- scripts/check_blinding.py | 2 +- scripts/complete_run.py | 6 +++--- scripts/{util => library}/FileKey.py | 0 scripts/{util => library}/__init__.py | 0 scripts/{util => library}/cal_grouping.py | 0 scripts/{util => library}/catalog.py | 0 scripts/{util => library}/convert_np.py | 0 scripts/{util => library}/create_pars_keylist.py | 0 scripts/{util => library}/log.py | 0 scripts/{util => library}/pars_loading.py | 0 scripts/{util => library}/patterns.py | 0 scripts/{util => library}/utils.py | 0 scripts/merge_channels.py | 2 +- scripts/par_psp.py | 2 +- scripts/pars_dsp_build_svm.py | 2 +- scripts/pars_dsp_dplms.py | 2 +- scripts/pars_dsp_eopt.py | 2 +- scripts/pars_dsp_event_selection.py | 2 +- scripts/pars_dsp_nopt.py | 2 +- scripts/pars_dsp_tau.py | 2 +- scripts/pars_hit_aoe.py | 4 ++-- scripts/pars_hit_ecal.py | 4 ++-- scripts/pars_hit_lq.py | 4 ++-- scripts/pars_hit_qc.py | 4 ++-- scripts/pars_pht_aoecal.py | 4 ++-- scripts/pars_pht_fast.py | 4 ++-- scripts/pars_pht_lqcal.py | 4 ++-- scripts/pars_pht_partcal.py | 4 ++-- scripts/pars_pht_qc.py | 4 ++-- scripts/pars_pht_qc_phy.py | 4 ++-- scripts/pars_tcm_pulser.py | 2 +- tests/test_util.py | 6 +++--- 59 files changed, 92 insertions(+), 93 deletions(-) rename scripts/{util => library}/FileKey.py (100%) rename scripts/{util => library}/__init__.py (100%) rename scripts/{util => library}/cal_grouping.py (100%) rename scripts/{util => library}/catalog.py (100%) rename scripts/{util => library}/convert_np.py (100%) rename scripts/{util => library}/create_pars_keylist.py (100%) rename scripts/{util => library}/log.py (100%) rename scripts/{util => library}/pars_loading.py (100%) rename scripts/{util => library}/patterns.py (100%) rename scripts/{util => library}/utils.py (100%) diff --git a/.ruff.toml b/.ruff.toml index 8b4d420..bd28747 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -29,7 +29,6 @@ lint.extend-ignore = [ "G004", # Logging statement uses f-string "PLR", # Design related pylint codes "E501", # Line too long - "PT004", # Use underscore for non-returning fixture (use usefixture instead) "RET505", # Unnecessary `else` after `return` statement "E402", # Module level imports at top of file need disable for numba default imports "NPY201", # Numpy 2 warnings ignore for the moment diff --git a/Snakefile b/Snakefile index f9168c3..10a6855 100644 --- a/Snakefile +++ b/Snakefile @@ -18,10 +18,10 @@ from datetime import datetime from collections import OrderedDict import logging -import scripts.util as ds -from scripts.util.pars_loading import ParsCatalog -from scripts.util.patterns import get_pattern_tier -from scripts.util.utils import ( +import scripts.library as lib +from scripts.library.pars_loading import ParsCatalog +from scripts.library.patterns import get_pattern_tier +from scripts.library.utils import ( subst_vars_in_snakemake_config, runcmd, config_path, @@ -45,7 +45,7 @@ chan_maps = chan_map_path(setup) meta = metadata_path(setup) det_status = det_status_path(setup) swenv = runcmd(setup) -part = ds.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") +part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") basedir = workflow.basedir diff --git a/rules/ann.smk b/rules/ann.smk index 15558ae..f1a47cd 100644 --- a/rules/ann.smk +++ b/rules/ann.smk @@ -4,7 +4,7 @@ to apply the ann and risetime cuts for psd. """ -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk index 85ee2f6..d28072f 100644 --- a/rules/blinding_calibration.smk +++ b/rules/blinding_calibration.smk @@ -4,7 +4,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps: - combining all channels into single par file """ -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_pars, get_pattern_plts, get_pattern_pars_tmp_channel, diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk index eb3407d..e556abb 100644 --- a/rules/blinding_check.smk +++ b/rules/blinding_check.smk @@ -4,7 +4,7 @@ Snakemake rules for checking blinding. Two steps: - combining all channel check files into single check file """ -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk index 820d0fa..68c33e4 100644 --- a/rules/chanlist_gen.smk +++ b/rules/chanlist_gen.smk @@ -4,12 +4,12 @@ import os import random import re -from scripts.util.FileKey import ChannelProcKey -from scripts.util.patterns import ( +from scripts.library.FileKey import ChannelProcKey +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) -from scripts.util.utils import filelist_path, runcmd +from scripts.library.utils import filelist_path, runcmd def get_par_chanlist( diff --git a/rules/common.smk b/rules/common.smk index 5625c79..2f8a82f 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -3,16 +3,16 @@ Helper functions for running data production """ from pathlib import Path -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, par_overwrite_path, get_pars_path, ) -from scripts.util import ProcessingFileKey -from scripts.util.catalog import Catalog -from scripts.util import utils +from scripts.library import ProcessingFileKey +from scripts.library.catalog import Catalog +from scripts.library import utils def ro(path): diff --git a/rules/dsp.smk b/rules/dsp.smk index 66a18c8..8000fa2 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -6,11 +6,11 @@ Snakemake rules for processing dsp tier. This is done in 4 steps: - running dsp over all channels using par file """ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.library.pars_loading import ParsCatalog +from scripts.library.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.util.create_pars_keylist import ParsKeyResolve -from scripts.util.patterns import ( +from scripts.library.create_pars_keylist import ParsKeyResolve +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/evt.smk b/rules/evt.smk index 112c92c..4e96a85 100644 --- a/rules/evt.smk +++ b/rules/evt.smk @@ -2,8 +2,8 @@ Snakemake rules for processing evt tier. """ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.patterns import ( +from scripts.library.pars_loading import ParsCatalog +from scripts.library.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index b3255f8..5e1857f 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -2,8 +2,8 @@ import glob import json, yaml from pathlib import Path -from scripts.util.FileKey import FileKey, run_grouper -from scripts.util.patterns import ( +from scripts.library.FileKey import FileKey, run_grouper +from scripts.library.patterns import ( get_pattern_tier, get_pattern_tier_raw_blind, get_pattern_tier_daq, diff --git a/rules/hit.smk b/rules/hit.smk index bb42651..5ea14ff 100644 --- a/rules/hit.smk +++ b/rules/hit.smk @@ -6,10 +6,10 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.library.pars_loading import ParsCatalog +from scripts.library.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/main.smk b/rules/main.smk index be671c0..f227f17 100644 --- a/rules/main.smk +++ b/rules/main.smk @@ -1,6 +1,6 @@ import os from datetime import datetime -from scripts.util.utils import ( +from scripts.library.utils import ( filelist_path, log_path, tmp_par_path, diff --git a/rules/pht.smk b/rules/pht.smk index e638832..239e3c5 100644 --- a/rules/pht.smk +++ b/rules/pht.smk @@ -6,11 +6,11 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.library.pars_loading import ParsCatalog +from scripts.library.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.util.utils import filelist_path, set_last_rule_name -from scripts.util.patterns import ( +from scripts.library.utils import filelist_path, set_last_rule_name +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk index 9369b6b..b177f12 100644 --- a/rules/pht_fast.smk +++ b/rules/pht_fast.smk @@ -1,7 +1,7 @@ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.create_pars_keylist import ParsKeyResolve -from scripts.util.utils import filelist_path, set_last_rule_name -from scripts.util.patterns import ( +from scripts.library.pars_loading import ParsCatalog +from scripts.library.create_pars_keylist import ParsKeyResolve +from scripts.library.utils import filelist_path, set_last_rule_name +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/psp.smk b/rules/psp.smk index bde834d..eed63ae 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -6,11 +6,11 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.library.pars_loading import ParsCatalog +from scripts.library.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.util.utils import set_last_rule_name -from scripts.util.patterns import ( +from scripts.library.utils import set_last_rule_name +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk index b89d8d3..d7a10f4 100644 --- a/rules/qc_phy.smk +++ b/rules/qc_phy.smk @@ -1,7 +1,7 @@ -from scripts.util.pars_loading import ParsCatalog -from scripts.util.create_pars_keylist import ParsKeyResolve -from scripts.util.utils import filelist_path, set_last_rule_name -from scripts.util.patterns import ( +from scripts.library.pars_loading import ParsCatalog +from scripts.library.create_pars_keylist import ParsKeyResolve +from scripts.library.utils import filelist_path, set_last_rule_name +from scripts.library.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/rules/raw.smk b/rules/raw.smk index fd95467..17d1e3b 100644 --- a/rules/raw.smk +++ b/rules/raw.smk @@ -1,12 +1,12 @@ -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, ) -from scripts.util.utils import set_last_rule_name -from scripts.util.create_pars_keylist import ParsKeyResolve +from scripts.library.utils import set_last_rule_name +from scripts.library.create_pars_keylist import ParsKeyResolve raw_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], diff --git a/rules/skm.smk b/rules/skm.smk index 3c620bf..404b81b 100644 --- a/rules/skm.smk +++ b/rules/skm.smk @@ -2,7 +2,7 @@ Snakemake rules for processing skm tier. """ -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, diff --git a/rules/tcm.smk b/rules/tcm.smk index e3a3410..941455d 100644 --- a/rules/tcm.smk +++ b/rules/tcm.smk @@ -2,7 +2,7 @@ Snakemake file containing the rules for generating the tcm """ -from scripts.util.patterns import ( +from scripts.library.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars_tmp_channel, diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index 603124d..aee335c 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -9,7 +9,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from util.log import build_log +from library.log import build_log def replace_list_with_array(dic): diff --git a/scripts/build_evt.py b/scripts/build_evt.py index e56912b..6ef1e0f 100644 --- a/scripts/build_evt.py +++ b/scripts/build_evt.py @@ -8,8 +8,8 @@ from dbetto import Props, TextDB from legendmeta import LegendMetadata from lgdo.types import Array +from library.log import build_log from pygama.evt import build_evt -from util.log import build_log sto = lh5.LH5Store() diff --git a/scripts/build_hit.py b/scripts/build_hit.py index cd48f7c..776bd59 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -5,8 +5,8 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 +from library.log import build_log from pygama.hit.build_hit import build_hit -from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 0b036dd..961b86a 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -18,7 +18,7 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 -from util.log import build_log +from library.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py index b4d2e22..ddc765c 100644 --- a/scripts/build_raw_fcio.py +++ b/scripts/build_raw_fcio.py @@ -6,7 +6,7 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from util.log import build_log +from library.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py index 711ecdd..0f5bbcb 100644 --- a/scripts/build_raw_orca.py +++ b/scripts/build_raw_orca.py @@ -6,7 +6,7 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from util.log import build_log +from library.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/build_skm.py b/scripts/build_skm.py index cfd52e0..f674e5d 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -5,7 +5,7 @@ from legendmeta.catalog import Props from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors -from util.log import build_log +from library.log import build_log def get_all_out_fields(input_table, out_fields, current_field=""): diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index 156e4c8..c16b3c4 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -6,8 +6,8 @@ from daq2lh5.orca import orca_flashcam from legendmeta import TextDB from legendmeta.catalog import Props +from library.log import build_log from pygama.evt.build_tcm import build_tcm -from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index f7b8dac..0b66c93 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -17,9 +17,9 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import lh5 +from library.log import build_log from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima -from util.log import build_log mpl.use("Agg") diff --git a/scripts/complete_run.py b/scripts/complete_run.py index c462367..7ffd73a 100644 --- a/scripts/complete_run.py +++ b/scripts/complete_run.py @@ -7,9 +7,9 @@ import time from pathlib import Path -import util.utils as ut -from util import patterns -from util.FileKey import FileKey +import library.utils as ut +from library import patterns +from library.FileKey import FileKey print("INFO: dataflow ran successfully, now few final checks and scripts") diff --git a/scripts/util/FileKey.py b/scripts/library/FileKey.py similarity index 100% rename from scripts/util/FileKey.py rename to scripts/library/FileKey.py diff --git a/scripts/util/__init__.py b/scripts/library/__init__.py similarity index 100% rename from scripts/util/__init__.py rename to scripts/library/__init__.py diff --git a/scripts/util/cal_grouping.py b/scripts/library/cal_grouping.py similarity index 100% rename from scripts/util/cal_grouping.py rename to scripts/library/cal_grouping.py diff --git a/scripts/util/catalog.py b/scripts/library/catalog.py similarity index 100% rename from scripts/util/catalog.py rename to scripts/library/catalog.py diff --git a/scripts/util/convert_np.py b/scripts/library/convert_np.py similarity index 100% rename from scripts/util/convert_np.py rename to scripts/library/convert_np.py diff --git a/scripts/util/create_pars_keylist.py b/scripts/library/create_pars_keylist.py similarity index 100% rename from scripts/util/create_pars_keylist.py rename to scripts/library/create_pars_keylist.py diff --git a/scripts/util/log.py b/scripts/library/log.py similarity index 100% rename from scripts/util/log.py rename to scripts/library/log.py diff --git a/scripts/util/pars_loading.py b/scripts/library/pars_loading.py similarity index 100% rename from scripts/util/pars_loading.py rename to scripts/library/pars_loading.py diff --git a/scripts/util/patterns.py b/scripts/library/patterns.py similarity index 100% rename from scripts/util/patterns.py rename to scripts/library/patterns.py diff --git a/scripts/util/utils.py b/scripts/library/utils.py similarity index 100% rename from scripts/util/utils.py rename to scripts/library/utils.py diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index bed04d2..6a99062 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -7,7 +7,7 @@ from legendmeta import LegendMetadata from legendmeta.catalog import Props from lgdo import lh5 -from util.FileKey import ChannelProcKey +from library.FileKey import ChannelProcKey def replace_path(d, old_path, new_path): diff --git a/scripts/par_psp.py b/scripts/par_psp.py index a7dfbb2..5ae65c9 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -9,7 +9,7 @@ import numpy as np from legendmeta import LegendMetadata from legendmeta.catalog import Props -from util.FileKey import ChannelProcKey +from library.FileKey import ChannelProcKey mpl.use("Agg") diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index b9174ec..3a01d1c 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -5,8 +5,8 @@ from legendmeta import TextDB from legendmeta.catalog import Props from lgdo import lh5 +from library.log import build_log from sklearn.svm import SVC -from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index 3e99228..dc38ad3 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -9,8 +9,8 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo import Array, Table +from library.log import build_log from pygama.pargen.dplms_ge_dict import dplms_ge_dict -from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index c95842d..14f1b05 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -11,13 +11,13 @@ from dspeed.units import unit_registry as ureg from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.log import build_log from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( BayesianOptimizer, run_bayesian_optimisation, run_one_dsp, ) -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index d5a924c..64964c2 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -12,9 +12,9 @@ import pygama.pargen.energy_cal as pgc from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.log import build_log from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index 766159c..f1f0f5c 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -8,9 +8,9 @@ import pygama.pargen.noise_optimization as pno from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.log import build_log from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp -from util.log import build_log sto = lh5.LH5Store() diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index b45a801..1ac3451 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -6,10 +6,10 @@ import numpy as np from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.log import build_log from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau -from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index c61322c..bfc681f 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -10,12 +10,12 @@ import pandas as pd from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.convert_np import convert_dict_np_to_float +from library.log import build_log from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data -from util.convert_np import convert_dict_np_to_float -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index b8ba61a..87a6afd 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -16,14 +16,14 @@ import pygama.math.histogram as pgh from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.convert_np import convert_dict_np_to_float +from library.log import build_log from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic -from util.convert_np import convert_dict_np_to_float -from util.log import build_log log = logging.getLogger(__name__) mpl.use("agg") diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index 48811ad..db721af 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -9,14 +9,14 @@ import pandas as pd from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.convert_np import convert_dict_np_to_float +from library.log import build_log from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data -from util.convert_np import convert_dict_np_to_float -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index d68aaeb..97a2720 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -12,14 +12,14 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls +from library.convert_np import convert_dict_np_to_float +from library.log import build_log from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data -from util.convert_np import convert_dict_np_to_float -from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index 0591f53..bd6d484 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -13,12 +13,12 @@ import pandas as pd from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.FileKey import ChannelProcKey, ProcessingFileKey +from library.log import build_log from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data -from util.FileKey import ChannelProcKey, ProcessingFileKey -from util.log import build_log log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index f916ad3..1db32ad 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -10,13 +10,13 @@ import pandas as pd from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.FileKey import ChannelProcKey, ProcessingFileKey +from library.log import build_log from pars_pht_aoecal import run_aoe_calibration from pars_pht_lqcal import run_lq_calibration from pars_pht_partcal import calibrate_partition from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data -from util.FileKey import ChannelProcKey, ProcessingFileKey -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 7185ab1..9e2f29a 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -11,14 +11,14 @@ import pandas as pd from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.FileKey import ChannelProcKey, ProcessingFileKey +from library.log import build_log from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data -from util.FileKey import ChannelProcKey, ProcessingFileKey -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 228107e..4915494 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -13,12 +13,12 @@ import pygama.math.histogram as pgh from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.FileKey import ChannelProcKey, ProcessingFileKey +from library.log import build_log from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data -from util.FileKey import ChannelProcKey, ProcessingFileKey -from util.log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index e79014f..02afade 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -12,14 +12,14 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls +from library.convert_np import convert_dict_np_to_float +from library.log import build_log from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data -from util.convert_np import convert_dict_np_to_float -from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 628a104..86328dc 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -13,12 +13,12 @@ from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props from lgdo.lh5 import ls +from library.convert_np import convert_dict_np_to_float +from library.log import build_log from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, ) -from util.convert_np import convert_dict_np_to_float -from util.log import build_log log = logging.getLogger(__name__) diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index c48338a..f230ad0 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -6,8 +6,8 @@ import numpy as np from legendmeta import LegendMetadata, TextDB from legendmeta.catalog import Props +from library.log import build_log from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from util.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) diff --git a/tests/test_util.py b/tests/test_util.py index 010c749..acaf609 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,7 +1,7 @@ import json from pathlib import Path -from scripts.util import ( +from scripts.library import ( CalibCatalog, FileKey, pars_catalog, @@ -9,8 +9,8 @@ subst_vars, unix_time, ) -from scripts.util.patterns import get_pattern_tier_daq, get_pattern_tier_dsp -from scripts.util.utils import ( +from scripts.library.patterns import get_pattern_tier_daq, get_pattern_tier_dsp +from scripts.library.utils import ( par_dsp_path, par_overwrite_path, tier_dsp_path, From 0bb23dfbfa32444cdd987a96bc41663d1d018626 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 22 Jan 2025 00:10:29 +0100 Subject: [PATCH 049/101] Replace scripts.library.catalog with dbetto.catalog --- rules/common.smk | 2 +- rules/filelist_gen.smk | 24 ++-- scripts/blinding_calibration.py | 2 +- scripts/build_dsp.py | 5 +- scripts/build_hit.py | 2 +- scripts/build_raw_blind.py | 2 +- scripts/build_skm.py | 4 +- scripts/build_tcm.py | 4 +- scripts/check_blinding.py | 5 +- scripts/create_chankeylist.py | 3 +- scripts/library/catalog.py | 191 ---------------------------- scripts/library/pars_loading.py | 3 +- scripts/merge_channels.py | 2 +- scripts/par_psp.py | 2 +- scripts/pars_dsp_build_svm.py | 4 +- scripts/pars_dsp_dplms.py | 5 +- scripts/pars_dsp_eopt.py | 5 +- scripts/pars_dsp_event_selection.py | 5 +- scripts/pars_dsp_nopt.py | 5 +- scripts/pars_dsp_svm.py | 2 +- scripts/pars_dsp_tau.py | 5 +- scripts/pars_hit_aoe.py | 5 +- scripts/pars_hit_ecal.py | 5 +- scripts/pars_hit_lq.py | 5 +- scripts/pars_hit_qc.py | 5 +- scripts/pars_pht_aoecal.py | 5 +- scripts/pars_pht_fast.py | 5 +- scripts/pars_pht_lqcal.py | 5 +- scripts/pars_pht_partcal.py | 5 +- scripts/pars_pht_qc.py | 5 +- scripts/pars_pht_qc_phy.py | 5 +- scripts/pars_tcm_pulser.py | 5 +- 32 files changed, 86 insertions(+), 251 deletions(-) delete mode 100644 scripts/library/catalog.py diff --git a/rules/common.smk b/rules/common.smk index 2f8a82f..4f99d5c 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -11,7 +11,7 @@ from scripts.library.patterns import ( get_pars_path, ) from scripts.library import ProcessingFileKey -from scripts.library.catalog import Catalog +from dbetto.catalog import Catalog from scripts.library import utils diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk index 5e1857f..5d1f928 100644 --- a/rules/filelist_gen.smk +++ b/rules/filelist_gen.smk @@ -22,10 +22,13 @@ def expand_runs(in_dict): "p01": "r001..r005" } """ - for per, run_list in in_dict.items(): - if isinstance(run_list, str) and ".." in runs: - start, end = runs.split("..") - in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)] + for per, datalist in in_dict.items(): + for datatype, run_list in datalist.items(): + if isinstance(run_list, str) and ".." in runs: + start, end = runs.split("..") + in_dict[per][datatype] = [ + f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1) + ] return in_dict @@ -200,14 +203,17 @@ def build_filelist( other_filenames += filename else: if ( - _key.period - in analysis_runs # check if period in analysis_runs dicts + _key.datatype in analysis_runs + and _key.period + in analysis_runs[ + _key.datatype + ] # check if period in analysis_runs dicts and ( _key.run - in analysis_runs[ - _key.period + in analysis_runs[_key.period][ + _key.datatype ] # check if run in analysis_runs dicts - or analysis_runs[_key.period] + or analysis_runs[_key.period][_key.datatype] == "all" # or if runs is just specified as "all" ) ): diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py index 072e756..4a666cc 100644 --- a/scripts/blinding_calibration.py +++ b/scripts/blinding_calibration.py @@ -12,8 +12,8 @@ import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np +from dbetto.catalog import Props from legendmeta import LegendMetadata -from legendmeta.catalog import Props from lgdo import lh5 from pygama.pargen.energy_cal import HPGeCalibration diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py index aee335c..6f97406 100644 --- a/scripts/build_dsp.py +++ b/scripts/build_dsp.py @@ -5,9 +5,10 @@ from pathlib import Path import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props from dspeed import build_dsp -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from legendmeta import LegendMetadata from lgdo import lh5 from library.log import build_log diff --git a/scripts/build_hit.py b/scripts/build_hit.py index 776bd59..6310521 100644 --- a/scripts/build_hit.py +++ b/scripts/build_hit.py @@ -2,8 +2,8 @@ import time from pathlib import Path +from dbetto.catalog import Props from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props from lgdo import lh5 from library.log import build_log from pygama.hit.build_hit import build_hit diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py index 961b86a..e343bde 100644 --- a/scripts/build_raw_blind.py +++ b/scripts/build_raw_blind.py @@ -15,8 +15,8 @@ import numexpr as ne import numpy as np +from dbetto.catalog import Props from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props from lgdo import lh5 from library.log import build_log diff --git a/scripts/build_skm.py b/scripts/build_skm.py index f674e5d..aefc31b 100644 --- a/scripts/build_skm.py +++ b/scripts/build_skm.py @@ -1,8 +1,8 @@ import argparse import awkward as ak -from legendmeta import TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors from library.log import build_log diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py index c16b3c4..2718c00 100644 --- a/scripts/build_tcm.py +++ b/scripts/build_tcm.py @@ -4,8 +4,8 @@ import lgdo.lh5 as lh5 import numpy as np from daq2lh5.orca import orca_flashcam -from legendmeta import TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props from library.log import build_log from pygama.evt.build_tcm import build_tcm diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index 0b66c93..f5dd378 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -14,8 +14,9 @@ import matplotlib.pyplot as plt import numexpr as ne import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from lgdo import lh5 from library.log import build_log from pygama.math.histogram import get_hist diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py index f01c879..a75be8b 100644 --- a/scripts/create_chankeylist.py +++ b/scripts/create_chankeylist.py @@ -1,7 +1,8 @@ import argparse from pathlib import Path -from legendmeta import LegendMetadata, TextDB +from dbetto import TextDB +from legendmeta import LegendMetadata argparser = argparse.ArgumentParser() argparser.add_argument("--det_status", help="det_status", type=str, required=True) diff --git a/scripts/library/catalog.py b/scripts/library/catalog.py deleted file mode 100644 index 739e21a..0000000 --- a/scripts/library/catalog.py +++ /dev/null @@ -1,191 +0,0 @@ -# -# Copyright (C) 2015 Oliver Schulz -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -This module stores the scripts for leading validity files based on timestamp and system -""" - -import bisect -import collections -import copy -import json -import types -from collections import namedtuple -from pathlib import Path - -import yaml - -from .utils import unix_time - - -class Props: - @staticmethod - def read_from(sources): - def read_impl(sources): - if isinstance(sources, (str, Path)): - file_name = sources - if isinstance(file_name, str): - file_name = Path(file_name) - if file_name.suffix in (".yaml", ".yml"): - with file_name.open() as file: - return yaml.safe_load(file) - elif file_name.suffix == ".json": - with file_name.open() as file: - return json.load(file) - else: - msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}" - raise ValueError(msg) - elif isinstance(sources, list): - result = {} - for p in map(read_impl, sources): - Props.add_to(result, p) - return result - else: - msg = f"Can't run Props.read_from on sources-value of type {type(sources)}" - raise ValueError(msg) - - return read_impl(sources) - - @staticmethod - def add_to(props_a, props_b): - a = props_a - b = props_b - - for key in b: - if key in a: - if isinstance(a[key], dict) and isinstance(b[key], dict): - Props.add_to(a[key], b[key]) - elif a[key] != b[key]: - a[key] = copy.copy(b[key]) - else: - a[key] = copy.copy(b[key]) - - -class PropsStream: - """Simple class to control loading of validity.yaml files""" - - @staticmethod - def get(value): - if isinstance(value, (str, Path)): - return PropsStream.read_from(value) - - if isinstance(value, (collections.abc.Sequence, types.GeneratorType)): - return value - - msg = f"Can't get PropsStream from value of type {type(value)}" - raise ValueError(msg) - - @staticmethod - def read_from(file_name): - with Path(file_name).open() as r: - file = yaml.safe_load(r) - file = sorted(file, key=lambda item: unix_time(item["valid_from"])) - yield from file - - -class Catalog(namedtuple("Catalog", ["entries"])): - """Implementation of the `YAML metadata validity specification `_.""" - - __slots__ = () - - class Entry(namedtuple("Entry", ["valid_from", "file"])): - __slots__ = () - - @staticmethod - def get(value): - if isinstance(value, Catalog): - return value - - if isinstance(value, str): - return Catalog.read_from(value) - - msg = f"Can't get Catalog from value of type {type(value)}" - raise ValueError(msg) - - @staticmethod - def read_from(file_name): - """Read from a valdiity YAML file and build a Catalog object""" - entries = {} - for props in PropsStream.get(file_name): - timestamp = props["valid_from"] - system = "all" if props.get("category") is None else props["category"] - file_key = props["apply"] - if system not in entries: - entries[system] = [] - mode = "append" if props.get("mode") is None else props["mode"] - mode = "reset" if len(entries[system]) == 0 else mode - if mode == "reset": - new = file_key - elif mode == "append": - new = entries[system][-1].file.copy() + file_key - elif mode == "remove": - new = entries[system][-1].file.copy() - for file in file_key: - new.remove(file) - elif mode == "replace": - new = entries[system][-1].file.copy() - if len(file_key) != 2: - msg = f"Invalid number of elements in replace mode: {len(file_key)}" - raise ValueError(msg) - new.remove(file_key[0]) - new += [file_key[1]] - - else: - msg = f"Unknown mode for {timestamp}" - raise ValueError(msg) - - if timestamp in [entry.valid_from for entry in entries[system]]: - msg = ( - f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" - ) - raise ValueError(msg) - entries[system].append(Catalog.Entry(unix_time(timestamp), new)) - - for system, system_dict in entries.items(): - entries[system] = sorted(system_dict, key=lambda entry: entry.valid_from) - return Catalog(entries) - - def valid_for(self, timestamp, system="all", allow_none=False): - """Get the valid entries for a given timestamp and system""" - if system in self.entries: - valid_from = [entry.valid_from for entry in self.entries[system]] - pos = bisect.bisect_right(valid_from, unix_time(timestamp)) - if pos > 0: - return self.entries[system][pos - 1].file - - if system != "all": - return self.valid_for(timestamp, system="all", allow_none=allow_none) - - if allow_none: - return None - - msg = f"No valid entries found for timestamp: {timestamp}, system: {system}" - raise RuntimeError(msg) - - if system != "all": - return self.valid_for(timestamp, system="all", allow_none=allow_none) - - if allow_none: - return None - - msg = f"No entries found for system: {system}" - raise RuntimeError(msg) - - @staticmethod - def get_files(catalog_file, timestamp, category="all"): - """Helper function to get the files for a given timestamp and category""" - catalog = Catalog.read_from(catalog_file) - return Catalog.valid_for(catalog, timestamp, category) diff --git a/scripts/library/pars_loading.py b/scripts/library/pars_loading.py index 137ae03..80f54a6 100644 --- a/scripts/library/pars_loading.py +++ b/scripts/library/pars_loading.py @@ -5,7 +5,8 @@ from pathlib import Path -from .catalog import Catalog +from dbetto.catalog import Catalog + from .FileKey import ProcessingFileKey # from .patterns import diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py index 6a99062..209708d 100644 --- a/scripts/merge_channels.py +++ b/scripts/merge_channels.py @@ -4,8 +4,8 @@ from pathlib import Path import numpy as np +from dbetto.catalog import Props from legendmeta import LegendMetadata -from legendmeta.catalog import Props from lgdo import lh5 from library.FileKey import ChannelProcKey diff --git a/scripts/par_psp.py b/scripts/par_psp.py index 5ae65c9..d996f3c 100644 --- a/scripts/par_psp.py +++ b/scripts/par_psp.py @@ -7,8 +7,8 @@ import matplotlib.dates as mdates import matplotlib.pyplot as plt import numpy as np +from dbetto.catalog import Props from legendmeta import LegendMetadata -from legendmeta.catalog import Props from library.FileKey import ChannelProcKey mpl.use("Agg") diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py index 3a01d1c..7a0ecc9 100644 --- a/scripts/pars_dsp_build_svm.py +++ b/scripts/pars_dsp_build_svm.py @@ -2,8 +2,8 @@ import pickle as pkl from pathlib import Path -from legendmeta import TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props from lgdo import lh5 from library.log import build_log from sklearn.svm import SVC diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py index dc38ad3..457bda1 100644 --- a/scripts/pars_dsp_dplms.py +++ b/scripts/pars_dsp_dplms.py @@ -6,8 +6,9 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from lgdo import Array, Table from library.log import build_log from pygama.pargen.dplms_ge_dict import dplms_ge_dict diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py index 14f1b05..a957c66 100644 --- a/scripts/pars_dsp_eopt.py +++ b/scripts/pars_dsp_eopt.py @@ -8,9 +8,10 @@ import numpy as np import pygama.pargen.energy_optimisation as om # noqa: F401 import sklearn.gaussian_process.kernels as ker +from dbetto import TextDB +from dbetto.catalog import Props from dspeed.units import unit_registry as ureg -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from legendmeta import LegendMetadata from library.log import build_log from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py index 64964c2..177eba6 100644 --- a/scripts/pars_dsp_event_selection.py +++ b/scripts/pars_dsp_event_selection.py @@ -10,8 +10,9 @@ import numpy as np import pygama.math.histogram as pgh import pygama.pargen.energy_cal as pgc -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.log import build_log from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py index f1f0f5c..53188ba 100644 --- a/scripts/pars_dsp_nopt.py +++ b/scripts/pars_dsp_nopt.py @@ -6,8 +6,9 @@ import lgdo.lh5 as lh5 import numpy as np import pygama.pargen.noise_optimization as pno -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.log import build_log from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py index 359bc3f..67d8a64 100644 --- a/scripts/pars_dsp_svm.py +++ b/scripts/pars_dsp_svm.py @@ -1,7 +1,7 @@ import argparse from pathlib import Path -from legendmeta.catalog import Props +from dbetto.catalog import Props argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py index 1ac3451..9a38526 100644 --- a/scripts/pars_dsp_tau.py +++ b/scripts/pars_dsp_tau.py @@ -4,8 +4,9 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.log import build_log from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py index bfc681f..575d3de 100644 --- a/scripts/pars_hit_aoe.py +++ b/scripts/pars_hit_aoe.py @@ -8,8 +8,9 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.convert_np import convert_dict_np_to_float from library.log import build_log from pygama.pargen.AoE_cal import * # noqa: F403 diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py index 87a6afd..488463c 100644 --- a/scripts/pars_hit_ecal.py +++ b/scripts/pars_hit_ecal.py @@ -14,8 +14,9 @@ import numpy as np import pygama.math.distributions as pgf import pygama.math.histogram as pgh -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.convert_np import convert_dict_np_to_float from library.log import build_log from matplotlib.colors import LogNorm diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py index db721af..4a75a06 100644 --- a/scripts/pars_hit_lq.py +++ b/scripts/pars_hit_lq.py @@ -7,8 +7,9 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.convert_np import convert_dict_np_to_float from library.log import build_log from pygama.math.distributions import gaussian diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py index 97a2720..460e858 100644 --- a/scripts/pars_hit_qc.py +++ b/scripts/pars_hit_qc.py @@ -9,8 +9,9 @@ from pathlib import Path import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from lgdo.lh5 import ls from library.convert_np import convert_dict_np_to_float from library.log import build_log diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py index bd6d484..f46fb7b 100644 --- a/scripts/pars_pht_aoecal.py +++ b/scripts/pars_pht_aoecal.py @@ -11,8 +11,9 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.FileKey import ChannelProcKey, ProcessingFileKey from library.log import build_log from pygama.pargen.AoE_cal import * # noqa: F403 diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py index 1db32ad..cf90b94 100644 --- a/scripts/pars_pht_fast.py +++ b/scripts/pars_pht_fast.py @@ -8,8 +8,9 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.FileKey import ChannelProcKey, ProcessingFileKey from library.log import build_log from pars_pht_aoecal import run_aoe_calibration diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py index 9e2f29a..d470480 100644 --- a/scripts/pars_pht_lqcal.py +++ b/scripts/pars_pht_lqcal.py @@ -9,8 +9,9 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.FileKey import ChannelProcKey, ProcessingFileKey from library.log import build_log from pygama.math.distributions import gaussian diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py index 4915494..b726b96 100644 --- a/scripts/pars_pht_partcal.py +++ b/scripts/pars_pht_partcal.py @@ -11,8 +11,9 @@ import pandas as pd import pygama.math.distributions as pgf import pygama.math.histogram as pgh -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.FileKey import ChannelProcKey, ProcessingFileKey from library.log import build_log from pygama.math.distributions import nb_poly diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py index 02afade..e3fbd12 100644 --- a/scripts/pars_pht_qc.py +++ b/scripts/pars_pht_qc.py @@ -9,8 +9,9 @@ from pathlib import Path import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from lgdo.lh5 import ls from library.convert_np import convert_dict_np_to_float from library.log import build_log diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py index 86328dc..c235064 100644 --- a/scripts/pars_pht_qc_phy.py +++ b/scripts/pars_pht_qc_phy.py @@ -10,8 +10,9 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from lgdo.lh5 import ls from library.convert_np import convert_dict_np_to_float from library.log import build_log diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py index f230ad0..b7618d1 100644 --- a/scripts/pars_tcm_pulser.py +++ b/scripts/pars_tcm_pulser.py @@ -4,8 +4,9 @@ import lgdo.lh5 as lh5 import numpy as np -from legendmeta import LegendMetadata, TextDB -from legendmeta.catalog import Props +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata from library.log import build_log from pygama.pargen.data_cleaning import get_tcm_pulser_ids From 85a2d9dd7cd1e7a2e0cb64d3f106265b5f6a557a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 28 Jan 2025 12:33:43 +0100 Subject: [PATCH 050/101] add threshold extraction --- scripts/check_blinding.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py index f5dd378..a81a1a3 100644 --- a/scripts/check_blinding.py +++ b/scripts/check_blinding.py @@ -87,7 +87,10 @@ # will always pass this check if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False: Path(args.output).mkdir(parents=True, exist_ok=True) - Props.write_to(args.output, {}) + Props.write_to( + args.output, + {"threshold_adc": np.nanmin(daqenergy), "threshold_kev": np.nanmin(daqenergy_cal)}, + ) else: msg = "peaks not found in daqenergy" raise RuntimeError(msg) From ef8996691fecf3d7b7e7d4ca282687b1258f5de7 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 28 Jan 2025 12:45:02 +0100 Subject: [PATCH 051/101] new structure --- config-lngs.yaml | 71 +++++++++++++++ config-nersc.yaml | 69 ++++++++++++++ templates/config-nersc.json | 87 ------------------ templates/config.json | 89 ------------------- Snakefile => workflow/Snakefile | 1 + .../Snakefile-build-raw | 0 .../profiles}/build-raw/config.yaml | 0 .../profiles}/default/config.yaml | 0 .../profiles}/legend-data/config.yaml | 0 {rules => workflow/rules}/ann.smk | 0 .../rules}/blinding_calibration.smk | 0 {rules => workflow/rules}/blinding_check.smk | 0 {rules => workflow/rules}/chanlist_gen.smk | 0 {rules => workflow/rules}/common.smk | 0 {rules => workflow/rules}/dsp.smk | 0 {rules => workflow/rules}/evt.smk | 0 {rules => workflow/rules}/filelist_gen.smk | 0 {rules => workflow/rules}/hit.smk | 0 {rules => workflow/rules}/main.smk | 0 {rules => workflow/rules}/pht.smk | 0 {rules => workflow/rules}/pht_fast.smk | 0 {rules => workflow/rules}/psp.smk | 0 {rules => workflow/rules}/qc_phy.smk | 0 {rules => workflow/rules}/raw.smk | 0 {rules => workflow/rules}/skm.smk | 0 {rules => workflow/rules}/tcm.smk | 0 {scripts => workflow/scripts}/__init__.py | 0 .../scripts}/blinding_calibration.py | 0 {scripts => workflow/scripts}/build_dsp.py | 0 {scripts => workflow/scripts}/build_evt.py | 0 {scripts => workflow/scripts}/build_fdb.py | 0 {scripts => workflow/scripts}/build_hit.py | 0 .../scripts}/build_raw_blind.py | 0 .../scripts}/build_raw_fcio.py | 0 .../scripts}/build_raw_orca.py | 0 {scripts => workflow/scripts}/build_skm.py | 0 {scripts => workflow/scripts}/build_tcm.py | 0 .../scripts}/check_blinding.py | 0 {scripts => workflow/scripts}/complete_run.py | 0 .../scripts}/create_chankeylist.py | 0 .../scripts}/library/FileKey.py | 0 .../scripts}/library/__init__.py | 0 .../scripts}/library/cal_grouping.py | 0 .../scripts}/library/convert_np.py | 0 .../scripts}/library/create_pars_keylist.py | 0 {scripts => workflow/scripts}/library/log.py | 0 .../scripts}/library/pars_loading.py | 0 .../scripts}/library/patterns.py | 0 .../scripts}/library/utils.py | 0 .../scripts}/merge_channels.py | 0 {scripts => workflow/scripts}/par_psp.py | 0 .../scripts}/pars_dsp_build_svm.py | 0 .../scripts}/pars_dsp_dplms.py | 0 .../scripts}/pars_dsp_eopt.py | 0 .../scripts}/pars_dsp_event_selection.py | 0 .../scripts}/pars_dsp_nopt.py | 0 {scripts => workflow/scripts}/pars_dsp_svm.py | 0 {scripts => workflow/scripts}/pars_dsp_tau.py | 0 {scripts => workflow/scripts}/pars_hit_aoe.py | 0 .../scripts}/pars_hit_ecal.py | 0 {scripts => workflow/scripts}/pars_hit_lq.py | 0 {scripts => workflow/scripts}/pars_hit_qc.py | 0 .../scripts}/pars_pht_aoecal.py | 0 .../scripts}/pars_pht_fast.py | 0 .../scripts}/pars_pht_lqcal.py | 0 .../scripts}/pars_pht_partcal.py | 0 {scripts => workflow/scripts}/pars_pht_qc.py | 0 .../scripts}/pars_pht_qc_phy.py | 0 .../scripts}/pars_tcm_pulser.py | 0 .../scripts}/write_filelist.py | 0 70 files changed, 141 insertions(+), 176 deletions(-) create mode 100644 config-lngs.yaml create mode 100644 config-nersc.yaml delete mode 100644 templates/config-nersc.json delete mode 100644 templates/config.json rename Snakefile => workflow/Snakefile (99%) rename Snakefile-build-raw => workflow/Snakefile-build-raw (100%) rename {profiles => workflow/profiles}/build-raw/config.yaml (100%) rename {profiles => workflow/profiles}/default/config.yaml (100%) rename {profiles => workflow/profiles}/legend-data/config.yaml (100%) rename {rules => workflow/rules}/ann.smk (100%) rename {rules => workflow/rules}/blinding_calibration.smk (100%) rename {rules => workflow/rules}/blinding_check.smk (100%) rename {rules => workflow/rules}/chanlist_gen.smk (100%) rename {rules => workflow/rules}/common.smk (100%) rename {rules => workflow/rules}/dsp.smk (100%) rename {rules => workflow/rules}/evt.smk (100%) rename {rules => workflow/rules}/filelist_gen.smk (100%) rename {rules => workflow/rules}/hit.smk (100%) rename {rules => workflow/rules}/main.smk (100%) rename {rules => workflow/rules}/pht.smk (100%) rename {rules => workflow/rules}/pht_fast.smk (100%) rename {rules => workflow/rules}/psp.smk (100%) rename {rules => workflow/rules}/qc_phy.smk (100%) rename {rules => workflow/rules}/raw.smk (100%) rename {rules => workflow/rules}/skm.smk (100%) rename {rules => workflow/rules}/tcm.smk (100%) rename {scripts => workflow/scripts}/__init__.py (100%) rename {scripts => workflow/scripts}/blinding_calibration.py (100%) rename {scripts => workflow/scripts}/build_dsp.py (100%) rename {scripts => workflow/scripts}/build_evt.py (100%) rename {scripts => workflow/scripts}/build_fdb.py (100%) rename {scripts => workflow/scripts}/build_hit.py (100%) rename {scripts => workflow/scripts}/build_raw_blind.py (100%) rename {scripts => workflow/scripts}/build_raw_fcio.py (100%) rename {scripts => workflow/scripts}/build_raw_orca.py (100%) rename {scripts => workflow/scripts}/build_skm.py (100%) rename {scripts => workflow/scripts}/build_tcm.py (100%) rename {scripts => workflow/scripts}/check_blinding.py (100%) rename {scripts => workflow/scripts}/complete_run.py (100%) rename {scripts => workflow/scripts}/create_chankeylist.py (100%) rename {scripts => workflow/scripts}/library/FileKey.py (100%) rename {scripts => workflow/scripts}/library/__init__.py (100%) rename {scripts => workflow/scripts}/library/cal_grouping.py (100%) rename {scripts => workflow/scripts}/library/convert_np.py (100%) rename {scripts => workflow/scripts}/library/create_pars_keylist.py (100%) rename {scripts => workflow/scripts}/library/log.py (100%) rename {scripts => workflow/scripts}/library/pars_loading.py (100%) rename {scripts => workflow/scripts}/library/patterns.py (100%) rename {scripts => workflow/scripts}/library/utils.py (100%) rename {scripts => workflow/scripts}/merge_channels.py (100%) rename {scripts => workflow/scripts}/par_psp.py (100%) rename {scripts => workflow/scripts}/pars_dsp_build_svm.py (100%) rename {scripts => workflow/scripts}/pars_dsp_dplms.py (100%) rename {scripts => workflow/scripts}/pars_dsp_eopt.py (100%) rename {scripts => workflow/scripts}/pars_dsp_event_selection.py (100%) rename {scripts => workflow/scripts}/pars_dsp_nopt.py (100%) rename {scripts => workflow/scripts}/pars_dsp_svm.py (100%) rename {scripts => workflow/scripts}/pars_dsp_tau.py (100%) rename {scripts => workflow/scripts}/pars_hit_aoe.py (100%) rename {scripts => workflow/scripts}/pars_hit_ecal.py (100%) rename {scripts => workflow/scripts}/pars_hit_lq.py (100%) rename {scripts => workflow/scripts}/pars_hit_qc.py (100%) rename {scripts => workflow/scripts}/pars_pht_aoecal.py (100%) rename {scripts => workflow/scripts}/pars_pht_fast.py (100%) rename {scripts => workflow/scripts}/pars_pht_lqcal.py (100%) rename {scripts => workflow/scripts}/pars_pht_partcal.py (100%) rename {scripts => workflow/scripts}/pars_pht_qc.py (100%) rename {scripts => workflow/scripts}/pars_pht_qc_phy.py (100%) rename {scripts => workflow/scripts}/pars_tcm_pulser.py (100%) rename {scripts => workflow/scripts}/write_filelist.py (100%) diff --git a/config-lngs.yaml b/config-lngs.yaml new file mode 100644 index 0000000..901cac8 --- /dev/null +++ b/config-lngs.yaml @@ -0,0 +1,71 @@ +setups: + l200: + paths: + sandbox_path: '' + tier_daq: $_/generated/tier/daq + tier_raw_blind: '' + workflow: $_/workflow + metadata: $_/inputs + config: $_/inputs/dataprod/config + par_overwrite: $_/inputs/dataprod/overrides + chan_map: $_/inputs/hardware/configuration + detector_db: $_/inputs/hardware/detectors + tier: $_/generated/tier + tier_raw: $_/generated/tier/raw + tier_tcm: $_/generated/tier/tcm + tier_dsp: $_/generated/tier/dsp + tier_hit: $_/generated/tier/hit + tier_ann: $_/generated/tier/ann + tier_evt: $_/generated/tier/evt + tier_psp: $_/generated/tier/psp + tier_pht: $_/generated/tier/pht + tier_pan: $_/generated/tier/pan + tier_pet: $_/generated/tier/pet + tier_skm: $_/generated/tier/skm + par: $_/generated/par + par_raw: $_/generated/par/raw + par_tcm: $_/generated/par/tcm + par_dsp: $_/generated/par/dsp + par_hit: $_/generated/par/hit + par_evt: $_/generated/par/evt + par_psp: $_/generated/par/psp + par_pht: $_/generated/par/pht + par_pet: $_/generated/par/pet + plt: $_/generated/plt + log: $_/generated/log + tmp_plt: $_/generated/tmp/plt + tmp_log: $_/generated/tmp/log + tmp_filelists: $_/generated/tmp/filelists + tmp_par: $_/generated/tmp/par + src: $_/software/python/src + install: $_/software/python/install + cache: $_/software/python/cache + table_format: + raw: ch{ch:07d}/raw + dsp: ch{ch:07d}/dsp + psp: ch{ch:07d}/dsp + hit: ch{ch:07d}/hit + pht: ch{ch:07d}/hit + evt: '{grp}/evt' + pet: '{grp}/evt' + skm: '{grp}/skm' + tcm: hardware_tcm_1 + execenv: + cmd: apptainer run + arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif + env: + HDF5_USE_FILE_LOCKING: 'False' + LGDO_BOUNDSCHECK: 'false' + DSPEED_BOUNDSCHECK: 'false' + PYGAMA_PARALLEL: 'false' + PYGAMA_FASTMATH: 'false' + pkg_versions: + pygama: pygama==2.0.3 + pylegendmeta: pylegendmeta==1.1.0 + dspeed: dspeed==1.6.1 + legend-pydataobj: legend-pydataobj==1.9.0 + legend-daq2lh5: legend-daq2lh5==1.2.2 + tensorflow: tensorflow==2.17 + keras: keras==3.6.0 + jax: jax==0.4.30 + meta_version: v0.5.7 diff --git a/config-nersc.yaml b/config-nersc.yaml new file mode 100644 index 0000000..88b5156 --- /dev/null +++ b/config-nersc.yaml @@ -0,0 +1,69 @@ +setups: + l200: + paths: + sandbox_path: '' + tier_daq: $_/generated/tier/daq + tier_raw_blind: '' + workflow: $_/workflow + metadata: $_/inputs + config: $_/inputs/dataprod/config + par_overwrite: $_/inputs/dataprod/overrides + chan_map: $_/inputs/hardware/configuration + detector_db: $_/inputs/hardware/detectors + tier: $_/generated/tier + tier_raw: /dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-raw/generated/tier/raw + tier_tcm: /dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-v2.0.0/generated/tier/tcm + tier_dsp: $_/generated/tier/dsp + tier_hit: $_/generated/tier/hit + tier_evt: $_/generated/tier/evt + tier_psp: $_/generated/tier/psp + tier_pht: $_/generated/tier/pht + tier_pet: $_/generated/tier/pet + tier_skm: $_/generated/tier/skm + par: $_/generated/par + par_raw: $_/generated/par/raw + par_tcm: $_/generated/par/tcm + par_dsp: $_/generated/par/dsp + par_hit: $_/generated/par/hit + par_evt: $_/generated/par/evt + par_psp: $_/generated/par/psp + par_pht: $_/generated/par/pht + par_pet: $_/generated/par/pet + plt: $_/generated/plt + log: $_/generated/log + tmp_plt: $_/generated/tmp/plt + tmp_log: $_/generated/tmp/log + tmp_filelists: $_/generated/tmp/filelists + tmp_par: $_/generated/tmp/par + src: $_/software/python/src + install: $_/software/python/install + cache: $_/software/python/cache + table_format: + raw: ch{ch:07d}/raw + dsp: ch{ch:07d}/dsp + psp: ch{ch:07d}/dsp + hit: ch{ch:07d}/hit + pht: ch{ch:07d}/hit + evt: '{grp}/evt' + pet: '{grp}/evt' + skm: '{grp}/skm' + tcm: hardware_tcm_1 + execenv: + cmd: shifter + arg: ' --image legendexp/legend-base:latest' + env: + HDF5_USE_FILE_LOCKING: 'FALSE' + LGDO_BOUNDSCHECK: 'false' + DSPEED_BOUNDSCHECK: 'false' + PYGAMA_PARALLEL: 'false' + PYGAMA_FASTMATH: 'false' + pkg_versions: + pygama: pygama==2.0.3 + pylegendmeta: pylegendmeta==0.10.2 + dspeed: dspeed==1.6.1 + legend-pydataobj: legend-pydataobj==1.10.0 + legend-daq2lh5: legend-daq2lh5==1.2.1 + tensorflow: tensorflow==2.17 + keras: keras==3.6.0 + jax: jax==0.4.30 + meta_version: v0.5.7 diff --git a/templates/config-nersc.json b/templates/config-nersc.json deleted file mode 100644 index 9df4fe7..0000000 --- a/templates/config-nersc.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "setups": { - "l200": { - "paths": { - "sandbox_path": "", - "tier_daq": "$_/generated/tier/daq", - "tier_raw_blind": "", - - "workflow": "$_/workflow", - - "metadata": "$_/inputs", - "config": "$_/inputs/dataprod/config", - "par_overwrite": "$_/inputs/dataprod/overrides", - "chan_map": "$_/inputs/hardware/configuration", - "detector_db": "$_/inputs/hardware/detectors", - - "tier": "$_/generated/tier", - "tier_raw": "/dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-raw/generated/tier/raw", - "tier_tcm": "/dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-v2.0.0/generated/tier/tcm", - "tier_dsp": "$_/generated/tier/dsp", - "tier_hit": "$_/generated/tier/hit", - "tier_evt": "$_/generated/tier/evt", - "tier_psp": "$_/generated/tier/psp", - "tier_pht": "$_/generated/tier/pht", - "tier_pet": "$_/generated/tier/pet", - "tier_skm": "$_/generated/tier/skm", - - "par": "$_/generated/par", - "par_raw": "$_/generated/par/raw", - "par_tcm": "$_/generated/par/tcm", - "par_dsp": "$_/generated/par/dsp", - "par_hit": "$_/generated/par/hit", - "par_evt": "$_/generated/par/evt", - "par_psp": "$_/generated/par/psp", - "par_pht": "$_/generated/par/pht", - "par_pet": "$_/generated/par/pet", - - "plt": "$_/generated/plt", - "log": "$_/generated/log", - - "tmp_plt": "$_/generated/tmp/plt", - "tmp_log": "$_/generated/tmp/log", - "tmp_filelists": "$_/generated/tmp/filelists", - "tmp_par": "$_/generated/tmp/par", - - "src": "$_/software/python/src", - "install": "$_/software/python/install", - "cache": "$_/software/python/cache" - }, - - "table_format": { - "raw": "ch{ch:07d}/raw", - "dsp": "ch{ch:07d}/dsp", - "psp": "ch{ch:07d}/dsp", - "hit": "ch{ch:07d}/hit", - "pht": "ch{ch:07d}/hit", - "evt": "{grp}/evt", - "pet": "{grp}/evt", - "skm": "{grp}/skm", - "tcm": "hardware_tcm_1" - }, - - "execenv": { - "cmd": "shifter", - "arg": " --image legendexp/legend-base:latest", - "env": { - "HDF5_USE_FILE_LOCKING": "FALSE", - "LGDO_BOUNDSCHECK": "false", - "DSPEED_BOUNDSCHECK": "false", - "PYGAMA_PARALLEL": "false", - "PYGAMA_FASTMATH": "false" - } - }, - "pkg_versions": { - "pygama": "pygama==2.0.3", - "pylegendmeta": "pylegendmeta==0.10.2", - "dbetto": "dbetto==1.0.6", - "dspeed": "dspeed==1.6.1", - "legend-pydataobj": "legend-pydataobj==1.10.0", - "legend-daq2lh5": "legend-daq2lh5==1.2.1", - "tensorflow": "tensorflow==2.17", - "keras": "keras==3.6.0", - "jax": "jax==0.4.30" - } - } - } -} diff --git a/templates/config.json b/templates/config.json deleted file mode 100644 index 17f4bbf..0000000 --- a/templates/config.json +++ /dev/null @@ -1,89 +0,0 @@ -{ - "setups": { - "l200": { - "paths": { - "sandbox_path": "", - "tier_daq": "$_/generated/tier/daq", - "tier_raw_blind": "", - - "workflow": "$_/workflow", - - "metadata": "$_/inputs", - "config": "$_/inputs/dataprod/config", - "par_overwrite": "$_/inputs/dataprod/overrides", - "chan_map": "$_/inputs/hardware/configuration", - "detector_db": "$_/inputs/hardware/detectors", - - "tier": "$_/generated/tier", - "tier_raw": "$_/generated/tier/raw", - "tier_tcm": "$_/generated/tier/tcm", - "tier_dsp": "$_/generated/tier/dsp", - "tier_hit": "$_/generated/tier/hit", - "tier_ann": "$_/generated/tier/ann", - "tier_evt": "$_/generated/tier/evt", - "tier_psp": "$_/generated/tier/psp", - "tier_pht": "$_/generated/tier/pht", - "tier_pan": "$_/generated/tier/pan", - "tier_pet": "$_/generated/tier/pet", - "tier_skm": "$_/generated/tier/skm", - - "par": "$_/generated/par", - "par_raw": "$_/generated/par/raw", - "par_tcm": "$_/generated/par/tcm", - "par_dsp": "$_/generated/par/dsp", - "par_hit": "$_/generated/par/hit", - "par_evt": "$_/generated/par/evt", - "par_psp": "$_/generated/par/psp", - "par_pht": "$_/generated/par/pht", - "par_pet": "$_/generated/par/pet", - - "plt": "$_/generated/plt", - "log": "$_/generated/log", - - "tmp_plt": "$_/generated/tmp/plt", - "tmp_log": "$_/generated/tmp/log", - "tmp_filelists": "$_/generated/tmp/filelists", - "tmp_par": "$_/generated/tmp/par", - - "src": "$_/software/python/src", - "install": "$_/software/python/install", - "cache": "$_/software/python/cache" - }, - - "table_format": { - "raw": "ch{ch:07d}/raw", - "dsp": "ch{ch:07d}/dsp", - "psp": "ch{ch:07d}/dsp", - "hit": "ch{ch:07d}/hit", - "pht": "ch{ch:07d}/hit", - "evt": "{grp}/evt", - "pet": "{grp}/evt", - "skm": "{grp}/skm", - "tcm": "hardware_tcm_1" - }, - - "execenv": { - "cmd": "apptainer run", - "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif", - "env": { - "HDF5_USE_FILE_LOCKING": "False", - "LGDO_BOUNDSCHECK": "false", - "DSPEED_BOUNDSCHECK": "false", - "PYGAMA_PARALLEL": "false", - "PYGAMA_FASTMATH": "false" - } - }, - "pkg_versions": { - "pygama": "pygama==2.0.3", - "pylegendmeta": "pylegendmeta==1.1.0", - "dbetto": "dbetto==1.0.6", - "dspeed": "dspeed==1.6.1", - "legend-pydataobj": "legend-pydataobj==1.9.0", - "legend-daq2lh5": "legend-daq2lh5==1.2.2", - "tensorflow": "tensorflow==2.17", - "keras": "keras==3.6.0", - "jax": "jax==0.4.30" - } - } - } -} diff --git a/Snakefile b/workflow/Snakefile similarity index 99% rename from Snakefile rename to workflow/Snakefile index 10a6855..3e25153 100644 --- a/Snakefile +++ b/workflow/Snakefile @@ -17,6 +17,7 @@ import glob from datetime import datetime from collections import OrderedDict import logging +from pylegendmeta import LegendMetadata import scripts.library as lib from scripts.library.pars_loading import ParsCatalog diff --git a/Snakefile-build-raw b/workflow/Snakefile-build-raw similarity index 100% rename from Snakefile-build-raw rename to workflow/Snakefile-build-raw diff --git a/profiles/build-raw/config.yaml b/workflow/profiles/build-raw/config.yaml similarity index 100% rename from profiles/build-raw/config.yaml rename to workflow/profiles/build-raw/config.yaml diff --git a/profiles/default/config.yaml b/workflow/profiles/default/config.yaml similarity index 100% rename from profiles/default/config.yaml rename to workflow/profiles/default/config.yaml diff --git a/profiles/legend-data/config.yaml b/workflow/profiles/legend-data/config.yaml similarity index 100% rename from profiles/legend-data/config.yaml rename to workflow/profiles/legend-data/config.yaml diff --git a/rules/ann.smk b/workflow/rules/ann.smk similarity index 100% rename from rules/ann.smk rename to workflow/rules/ann.smk diff --git a/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk similarity index 100% rename from rules/blinding_calibration.smk rename to workflow/rules/blinding_calibration.smk diff --git a/rules/blinding_check.smk b/workflow/rules/blinding_check.smk similarity index 100% rename from rules/blinding_check.smk rename to workflow/rules/blinding_check.smk diff --git a/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk similarity index 100% rename from rules/chanlist_gen.smk rename to workflow/rules/chanlist_gen.smk diff --git a/rules/common.smk b/workflow/rules/common.smk similarity index 100% rename from rules/common.smk rename to workflow/rules/common.smk diff --git a/rules/dsp.smk b/workflow/rules/dsp.smk similarity index 100% rename from rules/dsp.smk rename to workflow/rules/dsp.smk diff --git a/rules/evt.smk b/workflow/rules/evt.smk similarity index 100% rename from rules/evt.smk rename to workflow/rules/evt.smk diff --git a/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk similarity index 100% rename from rules/filelist_gen.smk rename to workflow/rules/filelist_gen.smk diff --git a/rules/hit.smk b/workflow/rules/hit.smk similarity index 100% rename from rules/hit.smk rename to workflow/rules/hit.smk diff --git a/rules/main.smk b/workflow/rules/main.smk similarity index 100% rename from rules/main.smk rename to workflow/rules/main.smk diff --git a/rules/pht.smk b/workflow/rules/pht.smk similarity index 100% rename from rules/pht.smk rename to workflow/rules/pht.smk diff --git a/rules/pht_fast.smk b/workflow/rules/pht_fast.smk similarity index 100% rename from rules/pht_fast.smk rename to workflow/rules/pht_fast.smk diff --git a/rules/psp.smk b/workflow/rules/psp.smk similarity index 100% rename from rules/psp.smk rename to workflow/rules/psp.smk diff --git a/rules/qc_phy.smk b/workflow/rules/qc_phy.smk similarity index 100% rename from rules/qc_phy.smk rename to workflow/rules/qc_phy.smk diff --git a/rules/raw.smk b/workflow/rules/raw.smk similarity index 100% rename from rules/raw.smk rename to workflow/rules/raw.smk diff --git a/rules/skm.smk b/workflow/rules/skm.smk similarity index 100% rename from rules/skm.smk rename to workflow/rules/skm.smk diff --git a/rules/tcm.smk b/workflow/rules/tcm.smk similarity index 100% rename from rules/tcm.smk rename to workflow/rules/tcm.smk diff --git a/scripts/__init__.py b/workflow/scripts/__init__.py similarity index 100% rename from scripts/__init__.py rename to workflow/scripts/__init__.py diff --git a/scripts/blinding_calibration.py b/workflow/scripts/blinding_calibration.py similarity index 100% rename from scripts/blinding_calibration.py rename to workflow/scripts/blinding_calibration.py diff --git a/scripts/build_dsp.py b/workflow/scripts/build_dsp.py similarity index 100% rename from scripts/build_dsp.py rename to workflow/scripts/build_dsp.py diff --git a/scripts/build_evt.py b/workflow/scripts/build_evt.py similarity index 100% rename from scripts/build_evt.py rename to workflow/scripts/build_evt.py diff --git a/scripts/build_fdb.py b/workflow/scripts/build_fdb.py similarity index 100% rename from scripts/build_fdb.py rename to workflow/scripts/build_fdb.py diff --git a/scripts/build_hit.py b/workflow/scripts/build_hit.py similarity index 100% rename from scripts/build_hit.py rename to workflow/scripts/build_hit.py diff --git a/scripts/build_raw_blind.py b/workflow/scripts/build_raw_blind.py similarity index 100% rename from scripts/build_raw_blind.py rename to workflow/scripts/build_raw_blind.py diff --git a/scripts/build_raw_fcio.py b/workflow/scripts/build_raw_fcio.py similarity index 100% rename from scripts/build_raw_fcio.py rename to workflow/scripts/build_raw_fcio.py diff --git a/scripts/build_raw_orca.py b/workflow/scripts/build_raw_orca.py similarity index 100% rename from scripts/build_raw_orca.py rename to workflow/scripts/build_raw_orca.py diff --git a/scripts/build_skm.py b/workflow/scripts/build_skm.py similarity index 100% rename from scripts/build_skm.py rename to workflow/scripts/build_skm.py diff --git a/scripts/build_tcm.py b/workflow/scripts/build_tcm.py similarity index 100% rename from scripts/build_tcm.py rename to workflow/scripts/build_tcm.py diff --git a/scripts/check_blinding.py b/workflow/scripts/check_blinding.py similarity index 100% rename from scripts/check_blinding.py rename to workflow/scripts/check_blinding.py diff --git a/scripts/complete_run.py b/workflow/scripts/complete_run.py similarity index 100% rename from scripts/complete_run.py rename to workflow/scripts/complete_run.py diff --git a/scripts/create_chankeylist.py b/workflow/scripts/create_chankeylist.py similarity index 100% rename from scripts/create_chankeylist.py rename to workflow/scripts/create_chankeylist.py diff --git a/scripts/library/FileKey.py b/workflow/scripts/library/FileKey.py similarity index 100% rename from scripts/library/FileKey.py rename to workflow/scripts/library/FileKey.py diff --git a/scripts/library/__init__.py b/workflow/scripts/library/__init__.py similarity index 100% rename from scripts/library/__init__.py rename to workflow/scripts/library/__init__.py diff --git a/scripts/library/cal_grouping.py b/workflow/scripts/library/cal_grouping.py similarity index 100% rename from scripts/library/cal_grouping.py rename to workflow/scripts/library/cal_grouping.py diff --git a/scripts/library/convert_np.py b/workflow/scripts/library/convert_np.py similarity index 100% rename from scripts/library/convert_np.py rename to workflow/scripts/library/convert_np.py diff --git a/scripts/library/create_pars_keylist.py b/workflow/scripts/library/create_pars_keylist.py similarity index 100% rename from scripts/library/create_pars_keylist.py rename to workflow/scripts/library/create_pars_keylist.py diff --git a/scripts/library/log.py b/workflow/scripts/library/log.py similarity index 100% rename from scripts/library/log.py rename to workflow/scripts/library/log.py diff --git a/scripts/library/pars_loading.py b/workflow/scripts/library/pars_loading.py similarity index 100% rename from scripts/library/pars_loading.py rename to workflow/scripts/library/pars_loading.py diff --git a/scripts/library/patterns.py b/workflow/scripts/library/patterns.py similarity index 100% rename from scripts/library/patterns.py rename to workflow/scripts/library/patterns.py diff --git a/scripts/library/utils.py b/workflow/scripts/library/utils.py similarity index 100% rename from scripts/library/utils.py rename to workflow/scripts/library/utils.py diff --git a/scripts/merge_channels.py b/workflow/scripts/merge_channels.py similarity index 100% rename from scripts/merge_channels.py rename to workflow/scripts/merge_channels.py diff --git a/scripts/par_psp.py b/workflow/scripts/par_psp.py similarity index 100% rename from scripts/par_psp.py rename to workflow/scripts/par_psp.py diff --git a/scripts/pars_dsp_build_svm.py b/workflow/scripts/pars_dsp_build_svm.py similarity index 100% rename from scripts/pars_dsp_build_svm.py rename to workflow/scripts/pars_dsp_build_svm.py diff --git a/scripts/pars_dsp_dplms.py b/workflow/scripts/pars_dsp_dplms.py similarity index 100% rename from scripts/pars_dsp_dplms.py rename to workflow/scripts/pars_dsp_dplms.py diff --git a/scripts/pars_dsp_eopt.py b/workflow/scripts/pars_dsp_eopt.py similarity index 100% rename from scripts/pars_dsp_eopt.py rename to workflow/scripts/pars_dsp_eopt.py diff --git a/scripts/pars_dsp_event_selection.py b/workflow/scripts/pars_dsp_event_selection.py similarity index 100% rename from scripts/pars_dsp_event_selection.py rename to workflow/scripts/pars_dsp_event_selection.py diff --git a/scripts/pars_dsp_nopt.py b/workflow/scripts/pars_dsp_nopt.py similarity index 100% rename from scripts/pars_dsp_nopt.py rename to workflow/scripts/pars_dsp_nopt.py diff --git a/scripts/pars_dsp_svm.py b/workflow/scripts/pars_dsp_svm.py similarity index 100% rename from scripts/pars_dsp_svm.py rename to workflow/scripts/pars_dsp_svm.py diff --git a/scripts/pars_dsp_tau.py b/workflow/scripts/pars_dsp_tau.py similarity index 100% rename from scripts/pars_dsp_tau.py rename to workflow/scripts/pars_dsp_tau.py diff --git a/scripts/pars_hit_aoe.py b/workflow/scripts/pars_hit_aoe.py similarity index 100% rename from scripts/pars_hit_aoe.py rename to workflow/scripts/pars_hit_aoe.py diff --git a/scripts/pars_hit_ecal.py b/workflow/scripts/pars_hit_ecal.py similarity index 100% rename from scripts/pars_hit_ecal.py rename to workflow/scripts/pars_hit_ecal.py diff --git a/scripts/pars_hit_lq.py b/workflow/scripts/pars_hit_lq.py similarity index 100% rename from scripts/pars_hit_lq.py rename to workflow/scripts/pars_hit_lq.py diff --git a/scripts/pars_hit_qc.py b/workflow/scripts/pars_hit_qc.py similarity index 100% rename from scripts/pars_hit_qc.py rename to workflow/scripts/pars_hit_qc.py diff --git a/scripts/pars_pht_aoecal.py b/workflow/scripts/pars_pht_aoecal.py similarity index 100% rename from scripts/pars_pht_aoecal.py rename to workflow/scripts/pars_pht_aoecal.py diff --git a/scripts/pars_pht_fast.py b/workflow/scripts/pars_pht_fast.py similarity index 100% rename from scripts/pars_pht_fast.py rename to workflow/scripts/pars_pht_fast.py diff --git a/scripts/pars_pht_lqcal.py b/workflow/scripts/pars_pht_lqcal.py similarity index 100% rename from scripts/pars_pht_lqcal.py rename to workflow/scripts/pars_pht_lqcal.py diff --git a/scripts/pars_pht_partcal.py b/workflow/scripts/pars_pht_partcal.py similarity index 100% rename from scripts/pars_pht_partcal.py rename to workflow/scripts/pars_pht_partcal.py diff --git a/scripts/pars_pht_qc.py b/workflow/scripts/pars_pht_qc.py similarity index 100% rename from scripts/pars_pht_qc.py rename to workflow/scripts/pars_pht_qc.py diff --git a/scripts/pars_pht_qc_phy.py b/workflow/scripts/pars_pht_qc_phy.py similarity index 100% rename from scripts/pars_pht_qc_phy.py rename to workflow/scripts/pars_pht_qc_phy.py diff --git a/scripts/pars_tcm_pulser.py b/workflow/scripts/pars_tcm_pulser.py similarity index 100% rename from scripts/pars_tcm_pulser.py rename to workflow/scripts/pars_tcm_pulser.py diff --git a/scripts/write_filelist.py b/workflow/scripts/write_filelist.py similarity index 100% rename from scripts/write_filelist.py rename to workflow/scripts/write_filelist.py From 054041d5d6af4e80431e61ab6c2cde639f26a0fd Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 28 Jan 2025 14:34:12 +0100 Subject: [PATCH 052/101] move dsp pars rules to dedicated file, scope rules with _geds --- Snakefile | 2 + rules/dsp.smk | 251 +---------------- rules/dsp_pars_geds.smk | 252 ++++++++++++++++++ rules/psp.smk | 203 +------------- rules/psp_pars_geds.smk | 209 +++++++++++++++ scripts/{par_psp.py => par_psp_geds.py} | 0 ...uild_svm.py => pars_dsp_build_svm_geds.py} | 0 ...rs_dsp_dplms.py => pars_dsp_dplms_geds.py} | 0 ...pars_dsp_eopt.py => pars_dsp_eopt_geds.py} | 0 ...t_selection.py => pars_dsp_evtsel_geds.py} | 0 ...pars_dsp_nopt.py => pars_dsp_nopt_geds.py} | 0 .../{pars_dsp_svm.py => pars_dsp_svm_geds.py} | 0 .../{pars_dsp_tau.py => pars_dsp_tau_geds.py} | 0 13 files changed, 465 insertions(+), 452 deletions(-) create mode 100644 rules/dsp_pars_geds.smk create mode 100644 rules/psp_pars_geds.smk rename scripts/{par_psp.py => par_psp_geds.py} (100%) rename scripts/{pars_dsp_build_svm.py => pars_dsp_build_svm_geds.py} (100%) rename scripts/{pars_dsp_dplms.py => pars_dsp_dplms_geds.py} (100%) rename scripts/{pars_dsp_eopt.py => pars_dsp_eopt_geds.py} (100%) rename scripts/{pars_dsp_event_selection.py => pars_dsp_evtsel_geds.py} (100%) rename scripts/{pars_dsp_nopt.py => pars_dsp_nopt_geds.py} (100%) rename scripts/{pars_dsp_svm.py => pars_dsp_svm_geds.py} (100%) rename scripts/{pars_dsp_tau.py => pars_dsp_tau_geds.py} (100%) diff --git a/Snakefile b/Snakefile index 10a6855..eff8f05 100644 --- a/Snakefile +++ b/Snakefile @@ -62,7 +62,9 @@ include: "rules/chanlist_gen.smk" include: "rules/common.smk" include: "rules/main.smk" include: "rules/tcm.smk" +include: "rules/dsp_pars_geds.smk" include: "rules/dsp.smk" +include: "rules/psp_pars_geds.smk" include: "rules/psp.smk" include: "rules/hit.smk" include: "rules/pht.smk" diff --git a/rules/dsp.smk b/rules/dsp.smk index 8000fa2..f4f8487 100644 --- a/rules/dsp.smk +++ b/rules/dsp.smk @@ -1,7 +1,5 @@ """ -Snakemake rules for processing dsp tier. This is done in 4 steps: -- extraction of pole zero constant(s) for each channel from cal data -- extraction of energy filter parameters and charge trapping correction for each channel from cal data +Snakemake rules for processing dsp tier. - combining of all channels into single pars files with associated plot and results files - running dsp over all channels using par file """ @@ -11,9 +9,6 @@ from scripts.library.create_pars_keylist import ParsKeyResolve from pathlib import Path from scripts.library.create_pars_keylist import ParsKeyResolve from scripts.library.patterns import ( - get_pattern_pars_tmp_channel, - get_pattern_plts_tmp_channel, - get_pattern_log_channel, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -34,250 +29,6 @@ Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) -rule build_pars_dsp_tau: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), - log: - get_pattern_log_channel(setup, "par_dsp_decay_constant"), - group: - "par-dsp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_tau.py " - "--configs {configs} " - "--log {log} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--plot_path {output.plots} " - "--output_file {output.decay_const} " - "--pulser_file {input.pulser} " - "--raw_files {input.files} " - - -rule build_pars_event_selection: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" - ), - pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), - raw_cal=get_blinding_curve_file, - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")), - log: - get_pattern_log_channel(setup, "par_dsp_event_selection"), - group: - "par-dsp" - resources: - runtime=300, - mem_swap=70, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_event_selection.py " - "--configs {configs} " - "--log {log} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--peak_file {output.peak_file} " - "--pulser_file {input.pulser_file} " - "--decay_const {input.database} " - "--raw_cal {input.raw_cal} " - "--raw_filelist {input.files}" - - -# This rule builds the optimal energy filter parameters for the dsp using fft files -rule build_pars_dsp_nopt: - input: - files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" - ), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - dsp_pars_nopt=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization") - ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")), - log: - get_pattern_log_channel(setup, "par_dsp_noise_optimization"), - group: - "par-dsp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_nopt.py " - "--database {input.database} " - "--configs {configs} " - "--log {log} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--inplots {input.inplots} " - "--plot_path {output.plots} " - "--dsp_pars {output.dsp_pars_nopt} " - "--raw_filelist {input.files}" - - -# This rule builds the dplms energy filter for the dsp using fft and cal files -rule build_pars_dsp_dplms: - input: - fft_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" - ), - peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), - database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")), - lh5_path=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5") - ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), - log: - get_pattern_log_channel(setup, "pars_dsp_dplms"), - group: - "par-dsp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_dplms.py " - "--fft_raw_filelist {input.fft_files} " - "--peak_file {input.peak_file} " - "--database {input.database} " - "--inplots {input.inplots} " - "--configs {configs} " - "--log {log} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--dsp_pars {output.dsp_pars} " - "--lh5_path {output.lh5_path} " - "--plot_path {output.plots} " - - -# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files -rule build_pars_dsp_eopt: - input: - peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), - decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")), - qbb_grid=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") - ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), - log: - get_pattern_log_channel(setup, "pars_dsp_eopt"), - group: - "par-dsp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_eopt.py " - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--peak_file {input.peak_file} " - "--inplots {input.inplots} " - "--decay_const {input.decay_const} " - "--plot_path {output.plots} " - "--qbb_grid_path {output.qbb_grid} " - "--final_dsp_pars {output.dsp_pars}" - - -rule build_svm_dsp: - input: - hyperpars=lambda wildcards: get_input_par_file( - wildcards, "dsp", "svm_hyperpars" - ), - train_data=lambda wildcards: str( - get_input_par_file(wildcards, "dsp", "svm_hyperpars") - ).replace("hyperpars.yaml", "train.lh5"), - params: - timestamp="{timestamp}", - datatype="cal", - output: - dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), - log: - str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"), - group: - "par-dsp-svm" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_build_svm.py " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--train_data {input.train_data} " - "--train_hyperpars {input.hyperpars} " - "--output_file {output.dsp_pars}" - - -rule build_pars_dsp_svm: - input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"), - svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"), - output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), - log: - get_pattern_log_channel(setup, "pars_dsp_svm"), - group: - "par-dsp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_svm.py " - "--log {log} " - "--input_file {input.dsp_pars} " - "--output_file {output.dsp_pars} " - "--svm_file {input.svm_file}" - - rule build_plts_dsp: input: lambda wildcards: get_plt_chanlist( diff --git a/rules/dsp_pars_geds.smk b/rules/dsp_pars_geds.smk new file mode 100644 index 0000000..7f16c9e --- /dev/null +++ b/rules/dsp_pars_geds.smk @@ -0,0 +1,252 @@ +""" +Snakemake rules for building dsp pars for HPGes, before running build_dsp() +- extraction of pole zero constant(s) for each channel from cal data +- extraction of energy filter parameters and charge trapping correction for each channel from cal data +""" + +from scripts.util.create_pars_keylist import pars_key_resolve +from scripts.util.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_tier_raw, + get_pattern_log, + get_pattern_pars, +) + +dsp_par_catalog = pars_key_resolve.get_par_catalog( + ["-*-*-*-cal"], + get_pattern_tier_raw(setup), + {"cal": ["par_dsp"], "lar": ["par_dsp"]}, +) + + +rule build_pars_dsp_tau_geds: + input: + files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + ), + pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), + log: + get_pattern_log_channel(setup, "par_dsp_decay_constant"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_tau_geds.py " + "--configs {configs} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--plot_path {output.plots} " + "--output_file {output.decay_const} " + "--pulser_file {input.pulser} " + "--raw_files {input.files}" + + +rule build_pars_evtsel_geds: + input: + files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + ), + pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + raw_cal=get_blinding_curve_file, + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")), + log: + get_pattern_log_channel(setup, "par_dsp_event_selection"), + group: + "par-dsp" + resources: + runtime=300, + mem_swap=70, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_evtsel_geds.py " + "--configs {configs} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--peak_file {output.peak_file} " + "--pulser_file {input.pulser_file} " + "--decay_const {input.database} " + "--raw_cal {input.raw_cal} " + "--raw_filelist {input.files}" + + +# This rule builds the optimal energy filter parameters for the dsp using fft files +rule build_pars_dsp_nopt_geds: + input: + files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + ), + database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + dsp_pars_nopt=temp( + get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization") + ), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")), + log: + get_pattern_log_channel(setup, "par_dsp_noise_optimization"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_nopt_geds.py " + "--database {input.database} " + "--configs {configs} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--inplots {input.inplots} " + "--plot_path {output.plots} " + "--dsp_pars {output.dsp_pars_nopt} " + "--raw_filelist {input.files}" + + +# This rule builds the dplms energy filter for the dsp using fft and cal files +rule build_pars_dsp_dplms_geds: + input: + fft_files=os.path.join( + filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + ), + peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), + database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), + inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")), + lh5_path=temp( + get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5") + ), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), + log: + get_pattern_log_channel(setup, "pars_dsp_dplms"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_dplms_geds.py " + "--fft_raw_filelist {input.fft_files} " + "--peak_file {input.peak_file} " + "--database {input.database} " + "--inplots {input.inplots} " + "--configs {configs} " + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--dsp_pars {output.dsp_pars} " + "--lh5_path {output.lh5_path} " + "--plot_path {output.plots} " + + +# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files +rule build_pars_dsp_eopt_geds: + input: + peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), + decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"), + inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")), + qbb_grid=temp( + get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") + ), + plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), + log: + get_pattern_log_channel(setup, "pars_dsp_eopt"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_eopt_geds.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--peak_file {input.peak_file} " + "--inplots {input.inplots} " + "--decay_const {input.decay_const} " + "--plot_path {output.plots} " + "--qbb_grid_path {output.qbb_grid} " + "--final_dsp_pars {output.dsp_pars}" + + +rule build_svm_dsp_geds: + input: + hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), + train_data=lambda wildcards: get_svm_file( + wildcards, "dsp", "svm_hyperpars" + ).replace("hyperpars.json", "train.lh5"), + output: + dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), + log: + get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + group: + "par-dsp-svm" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_build_svm_geds.py " + "--log {log} " + "--train_data {input.train_data} " + "--train_hyperpars {input.hyperpars} " + "--output_file {output.dsp_pars}" + + +rule build_pars_dsp_svm_geds: + input: + dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"), + svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"), + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + log: + get_pattern_log_channel(setup, "pars_dsp_svm"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_svm_geds.py " + "--log {log} " + "--input_file {input.dsp_pars} " + "--output_file {output.dsp_pars} " + "--svm_file {input.svm_file}" diff --git a/rules/psp.smk b/rules/psp.smk index eed63ae..dc0cfe5 100644 --- a/rules/psp.smk +++ b/rules/psp.smk @@ -1,7 +1,5 @@ """ -Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps: -- extraction of calibration curves(s) for each run for each channel from cal data -- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data +Snakemake rules for processing psp (partition dsp) tier data. - combining of all channels into single pars files with associated plot and results files - running build hit over all channels using par file """ @@ -9,11 +7,7 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 from scripts.library.pars_loading import ParsCatalog from scripts.library.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.library.utils import set_last_rule_name from scripts.library.patterns import ( - get_pattern_pars_tmp_channel, - get_pattern_plts_tmp_channel, - get_pattern_log_channel, get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, @@ -33,201 +27,6 @@ if psp_par_cat_file.is_file(): Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) -psp_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - dsp_pars=part.get_par_files( - dsp_par_catalog, - partition, - key, - tier="dsp", - name="eopt", - ), - dsp_objs=part.get_par_files( - dsp_par_catalog, - partition, - key, - tier="dsp", - name="objects", - extension="pkl", - ), - dsp_plots=part.get_plt_files( - dsp_par_catalog, partition, key, tier="dsp" - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - psp_par_catalog, partition, key, tier="psp" - ), - output: - psp_pars=temp( - part.get_par_files( - psp_par_catalog, - partition, - key, - tier="psp", - name="eopt", - ) - ), - psp_objs=temp( - part.get_par_files( - psp_par_catalog, - partition, - key, - tier="psp", - name="objects", - extension="pkl", - ) - ), - psp_plots=temp( - part.get_plt_files( - psp_par_catalog, - partition, - key, - tier="psp", - ) - ), - log: - part.get_log_file( - psp_par_catalog, - partition, - key, - "psp", - name="par_psp", - ), - group: - "par-psp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/par_psp.py " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--in_plots {input.dsp_plots} " - "--out_plots {output.psp_plots} " - "--in_obj {input.dsp_objs} " - "--out_obj {output.psp_objs} " - "--input {input.dsp_pars} " - "--output {output.psp_pars} " - - set_last_rule_name(workflow, f"{key}-{partition}-build_par_psp") - - if key in psp_rules: - psp_rules[key].append(list(workflow.rules)[-1]) - else: - psp_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_par_psp: - input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"), - dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"), - dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")), - psp_objs=temp( - get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl") - ), - psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), - log: - get_pattern_log_channel(setup, "pars_psp"), - group: - "par-psp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/par_psp.py " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--in_plots {input.dsp_plots} " - "--out_plots {output.psp_plots} " - "--in_obj {input.dsp_objs} " - "--out_obj {output.psp_objs} " - "--input {input.dsp_pars} " - "--output {output.psp_pars} " - - -fallback_psp_rule = list(workflow.rules)[-1] -rule_order_list = [] -ordered = OrderedDict(psp_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_psp_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - - -rule build_svm_psp: - input: - hyperpars=lambda wildcards: get_input_par_file( - wildcards, "psp", "svm_hyperpars" - ), - train_data=lambda wildcards: str( - get_input_par_file(wildcards, "psp", "svm_hyperpars") - ).replace("hyperpars.yaml", "train.lh5"), - params: - timestamp="{timestamp}", - datatype="cal", - output: - dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), - log: - get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"), - group: - "par-dsp-svm" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_build_svm.py " - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--train_data {input.train_data} " - "--train_hyperpars {input.hyperpars} " - "--output_file {output.dsp_pars}" - - -rule build_pars_psp_svm: - input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"), - svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"), - output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), - log: - get_pattern_log_channel(setup, "pars_dsp_svm"), - group: - "par-dsp" - resources: - runtime=300, - shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_svm.py " - "--log {log} " - "--input_file {input.dsp_pars} " - "--output_file {output.dsp_pars} " - "--svm_file {input.svm_model}" - rule build_pars_psp_objects: input: diff --git a/rules/psp_pars_geds.smk b/rules/psp_pars_geds.smk new file mode 100644 index 0000000..8d3d2c8 --- /dev/null +++ b/rules/psp_pars_geds.smk @@ -0,0 +1,209 @@ +""" +Snakemake rules for processing psp (partition dsp) tier data. +- extraction of calibration curves(s) for each run for each channel from cal data +- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data +""" + +from scripts.util.pars_loading import pars_catalog +from scripts.util.create_pars_keylist import pars_key_resolve +from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name +from scripts.util.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_log, + get_pattern_pars, +) + +psp_par_catalog = pars_key_resolve.get_par_catalog( + ["-*-*-*-cal"], + get_pattern_tier_raw(setup), + {"cal": ["par_psp"], "lar": ["par_psp"]}, +) + +psp_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + dsp_pars=part.get_par_files( + dsp_par_catalog, + partition, + key, + tier="dsp", + name="eopt", + ), + dsp_objs=part.get_par_files( + dsp_par_catalog, + partition, + key, + tier="dsp", + name="objects", + extension="pkl", + ), + dsp_plots=part.get_plt_files( + dsp_par_catalog, partition, key, tier="dsp" + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + psp_par_catalog, partition, key, tier="psp" + ), + output: + psp_pars=temp( + part.get_par_files( + psp_par_catalog, + partition, + key, + tier="psp", + name="eopt", + ) + ), + psp_objs=temp( + part.get_par_files( + psp_par_catalog, + partition, + key, + tier="psp", + name="objects", + extension="pkl", + ) + ), + psp_plots=temp( + part.get_plt_files( + psp_par_catalog, + partition, + key, + tier="psp", + ) + ), + log: + part.get_log_file( + psp_par_catalog, + partition, + key, + "psp", + name="par_psp", + ), + group: + "par-psp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/par_psp_geds.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--in_plots {input.dsp_plots} " + "--out_plots {output.psp_plots} " + "--in_obj {input.dsp_objs} " + "--out_obj {output.psp_objs} " + "--input {input.dsp_pars} " + "--output {output.psp_pars} " + + set_last_rule_name(workflow, f"{key}-{partition}-build_par_psp") + + if key in psp_rules: + psp_rules[key].append(list(workflow.rules)[-1]) + else: + psp_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_par_psp: + input: + dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"), + dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"), + dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")), + psp_objs=temp( + get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl") + ), + psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), + log: + get_pattern_log_channel(setup, "pars_psp"), + group: + "par-psp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/par_psp.py " + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--in_plots {input.dsp_plots} " + "--out_plots {output.psp_plots} " + "--in_obj {input.dsp_objs} " + "--out_obj {output.psp_objs} " + "--input {input.dsp_pars} " + "--output {output.psp_pars} " + + +fallback_psp_rule = list(workflow.rules)[-1] +rule_order_list = [] +ordered = OrderedDict(psp_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_psp_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + + +rule build_svm_psp: + input: + hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), + train_data=lambda wildcards: get_svm_file( + wildcards, "psp", "svm_hyperpars" + ).replace("hyperpars.json", "train.lh5"), + output: + dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), + log: + get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + group: + "par-dsp-svm" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_build_svm_geds.py " + "--log {log} " + "--train_data {input.train_data} " + "--train_hyperpars {input.hyperpars} " + "--output_file {output.dsp_pars}" + + +rule build_pars_psp_svm: + input: + dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"), + svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"), + output: + dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), + log: + get_pattern_log_channel(setup, "pars_dsp_svm"), + group: + "par-dsp" + resources: + runtime=300, + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/pars_dsp_svm_geds.py " + "--log {log} " + "--input_file {input.dsp_pars} " + "--output_file {output.dsp_pars} " + "--svm_file {input.svm_model}" diff --git a/scripts/par_psp.py b/scripts/par_psp_geds.py similarity index 100% rename from scripts/par_psp.py rename to scripts/par_psp_geds.py diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm_geds.py similarity index 100% rename from scripts/pars_dsp_build_svm.py rename to scripts/pars_dsp_build_svm_geds.py diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms_geds.py similarity index 100% rename from scripts/pars_dsp_dplms.py rename to scripts/pars_dsp_dplms_geds.py diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt_geds.py similarity index 100% rename from scripts/pars_dsp_eopt.py rename to scripts/pars_dsp_eopt_geds.py diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_evtsel_geds.py similarity index 100% rename from scripts/pars_dsp_event_selection.py rename to scripts/pars_dsp_evtsel_geds.py diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt_geds.py similarity index 100% rename from scripts/pars_dsp_nopt.py rename to scripts/pars_dsp_nopt_geds.py diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm_geds.py similarity index 100% rename from scripts/pars_dsp_svm.py rename to scripts/pars_dsp_svm_geds.py diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau_geds.py similarity index 100% rename from scripts/pars_dsp_tau.py rename to scripts/pars_dsp_tau_geds.py From c01cf27687bca8dc252c1c81efb7cc293c603f56 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 28 Jan 2025 18:41:55 +0100 Subject: [PATCH 053/101] update with prodenv stuff and uv --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index 4eb2181..a904f40 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,13 @@ *~ *.csv +# uv +uv.lock + +#prodenv_stuff +inputs +software +generated # -------------------- github-generated stuff ------------------- # Byte-compiled / optimized / DLL files From 0c4b270de18d1c478134e54574ffee4737596de5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 28 Jan 2025 18:42:48 +0100 Subject: [PATCH 054/101] add execenv scripts --- pyproject.toml | 37 +++++-- workflow/scripts/library/__init__.py | 4 - workflow/scripts/library/execenv.py | 148 +++++++++++++++++++++++++++ 3 files changed, 177 insertions(+), 12 deletions(-) create mode 100644 workflow/scripts/library/execenv.py diff --git a/pyproject.toml b/pyproject.toml index d96ee37..62ebab3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,31 @@ -[tool.uv] -package = false - [tool.uv.workspace] -exclude = ["rules", "templates", "scripts", "generated", "inputs", "software", "workflow"] +exclude = ["generated", "inputs", "software", "workflow"] + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" [tool.setuptools] +include-package-data = true +zip-safe = false +license-files = [ + "LICENSE", +] py-modules = [] +[tool.setuptools.package-dir] +"" = "workflow" + +[tool.setuptools.packages.find] +where = [ + "workflow", +] + +[tool.setuptools_scm] +write_to = "workflow/_version.py" + [project] -name = "legend-dataflow" +name = "legend_dataflow" description = "Python package for processing L200 data" authors = [ {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"}, @@ -33,6 +50,7 @@ requires-python = ">=3.11" dependencies = [ "dbetto>=1.0.5", "snakemake>=8.16", + #"pylegendmeta>=1.0.0", wait for new release ] dynamic = [ "version", @@ -47,17 +65,17 @@ no_container = [ "legend-daq2lh5", ] test = [ - "legend-dataflow[no_container]", + "legend_dataflow[no_container]", "pytest >=6", "pytest-cov >=3", ] dev = [ - "legend-dataflow[no_container]", + "legend_dataflow[no_container]", "pytest >=6", "pytest-cov >=3", ] docs = [ - "legend-dataflow[no_container]", + "legend_dataflow[no_container]", "sphinx>=7.0", "myst_parser>=0.13", "sphinx_inline_tabs", @@ -65,3 +83,6 @@ docs = [ "sphinx_autodoc_typehints", "furo>=2023.08.17", ] + +[project.scripts] +dataprod = "scripts.library.execenv:dataprod" diff --git a/workflow/scripts/library/__init__.py b/workflow/scripts/library/__init__.py index 5aee6d5..f812d11 100644 --- a/workflow/scripts/library/__init__.py +++ b/workflow/scripts/library/__init__.py @@ -1,5 +1,4 @@ from .cal_grouping import CalGrouping -from .catalog import Catalog, Props, PropsStream from .create_pars_keylist import ParsKeyResolve from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey from .pars_loading import ParsCatalog @@ -13,14 +12,11 @@ __all__ = [ "CalGrouping", - "Catalog", "ChannelProcKey", "FileKey", "ParsCatalog", "ParsKeyResolve", "ProcessingFileKey", - "Props", - "PropsStream", "runcmd", "subst_vars", "subst_vars", diff --git a/workflow/scripts/library/execenv.py b/workflow/scripts/library/execenv.py new file mode 100644 index 0000000..c4e249d --- /dev/null +++ b/workflow/scripts/library/execenv.py @@ -0,0 +1,148 @@ +# ruff: noqa: T201 +from __future__ import annotations + +import argparse +import os +import shutil +import string +import subprocess +from pathlib import Path + +import yaml + + +def dataprod() -> None: + """dataprod's command-line interface for installing and loading the software in the data production environment. + + .. code-block:: console + + $ dataprod --help + $ dataprod load --help # help section for a specific sub-command + """ + + parser = argparse.ArgumentParser( + prog="dataprod", description="dataprod's command-line interface" + ) + + subparsers = parser.add_subparsers() + parser_install = subparsers.add_parser( + "install", help="install user software in data production environment" + ) + parser_install.add_argument( + "config_file", help="production cycle configuration file", type=str + ) + parser_install.add_argument( + "-r", help="remove software directory before installing software", action="store_true" + ) + parser_install.set_defaults(func=install) + + parser_load = subparsers.add_parser( + "load", help="load data production environment and execute a given command" + ) + parser_load.add_argument("config_file", help="production cycle configuration file", type=str) + parser_load.add_argument( + "command", help="command to run within the container", type=str, nargs="+" + ) + parser_load.set_defaults(func=load) + + args = parser.parse_args() + args.func(args) + + +def install(args) -> None: + """ + This function installs user software in the data production environment. + The software packages should be specified in the config.yaml file with the format: + + ```yaml + setups: + l200: + pkg_versions: + package_name: package_version + ``` + """ + print(args.config_file) + if not Path(args.config_file).is_file(): + msg = "config file is not a regular file" + raise RuntimeError(msg) + + config_file_dir = Path(args.config_file).resolve().parent + with Path(args.config_file).open() as r: + config_dic = yaml.safe_load(r) + + exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"] + exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"] + path_src = config_dic["setups"]["l200"]["paths"]["src"] + path_install = config_dic["setups"]["l200"]["paths"]["install"] + path_cache = config_dic["setups"]["l200"]["paths"]["cache"] + + exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) + exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) + path_src = Path(string.Template(path_src).substitute({"_": config_file_dir})) + path_install = Path(string.Template(path_install).substitute({"_": config_file_dir})) + path_cache = Path(string.Template(path_cache).substitute({"_": config_file_dir})) + + if args.r: + shutil.rmtree(path_install) + shutil.rmtree(path_cache) + + pkg_list = "" + for pkg, pkg_version in config_dic["setups"]["l200"]["pkg_versions"].items(): + if (path_src / pkg).exists(): + pkg_list += f" '{path_src / pkg}'" + else: + pkg_list += f" '{pkg_version}'" + + cmd_expr = ( + f"PYTHONUSERBASE={path_install} PIP_CACHE_DIR={path_cache} " + f"{exec_cmd} {exec_arg} python3 -B -m pip install --no-warn-script-location {pkg_list}" + ) + print("INFO: running:", cmd_expr) + os.system(cmd_expr) + + +def load(args) -> None: + """ + This function loads the data production environment and executes a given command. + """ + + if not Path(args.config_file).is_file(): + print("Error: config file does not exist") + exit() + + config_file_dir = Path(args.config_file).resolve().parent + with Path(args.config_file).open() as r: + config_dic = yaml.safe_load(r) + + exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"] + exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"] + env_vars = config_dic["setups"]["l200"]["execenv"]["env"] + path_install = config_dic["setups"]["l200"]["paths"]["install"] + + exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) + exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) + path_install = string.Template(path_install).substitute({"_": config_file_dir}) + + xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR") + if xdg_runtime_dir: + subprocess.run( + [*(exec_cmd.split()), exec_arg, *args.command], + env=dict( + PYTHONUSERBASE=path_install, + APPTAINERENV_APPEND_PATH=f":{path_install}/bin", + APPTAINER_BINDPATH=xdg_runtime_dir, + **env_vars, + ), + check=True, + ) + else: + subprocess.run( + [*(exec_cmd.split()), exec_arg, *args.command], + env=dict( + PYTHONUSERBASE=path_install, + APPTAINERENV_APPEND_PATH=f":{path_install}/bin", + APPTAINER_BINDPATH=xdg_runtime_dir, + **env_vars, + ), + check=True, + ) From 88cafa5de757e7af458fe48f6adc410395b77df5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 28 Jan 2025 18:44:17 +0100 Subject: [PATCH 055/101] add code for metadata checkout subject to release --- workflow/Snakefile | 8 +++++++- workflow/Snakefile-build-raw | 7 ++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index dbc02b5..ba839be 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -46,9 +46,15 @@ chan_maps = chan_map_path(setup) meta = metadata_path(setup) det_status = det_status_path(setup) swenv = runcmd(setup) -part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") basedir = workflow.basedir +# wait for new pylegendmeta release +# if not Path(meta).exists(): +# meta = LegendMetadata() +# meta.checkout(config["setups"]["l200"]["meta_version"]) + +part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") + wildcard_constraints: experiment=r"\w+", diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index 5dddfa6..0dec789 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -34,9 +34,14 @@ chan_maps = chan_map_path(setup) swenv = runcmd(setup) meta = metadata_path(setup) det_status = det_status_path(setup) - basedir = workflow.basedir +# wait for new pylegendmeta release +# if not Path(meta).exists(): +# meta = LegendMetadata() +# meta.checkout(config["setups"]["l200"]["meta_version"]) +s + wildcard_constraints: experiment=r"\w+", From 93ad1b3668d48ee11e65e69b3bbab2c5de6739dd Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 11:22:23 +0100 Subject: [PATCH 056/101] update pre-commit config --- .pre-commit-config.yaml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9217a46..96cec14 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,10 +36,18 @@ repos: - id: ruff args: ["--fix", "--show-fixes"] -- repo: https://github.com/asottile/setup-cfg-fmt - rev: "v2.7.0" +- repo: https://github.com/abravalheri/validate-pyproject + rev: "v0.23" hooks: - - id: setup-cfg-fmt + - id: validate-pyproject + additional_dependencies: ["validate-pyproject-schema-store[all]"] + +- repo: https://github.com/python-jsonschema/check-jsonschema + rev: "0.30.0" + hooks: + - id: check-dependabot + - id: check-github-workflows + - id: check-readthedocs - repo: https://github.com/pre-commit/mirrors-mypy rev: "v1.14.1" From 7cf0a1b90569acdbbf8704fd15a5a8e80f663785 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 11:22:50 +0100 Subject: [PATCH 057/101] Move library to own package and install that --- pyproject.toml | 31 +++++++++---------- workflow/scripts/build_dsp.py | 2 +- workflow/scripts/build_evt.py | 2 +- workflow/scripts/build_hit.py | 2 +- workflow/scripts/build_raw_blind.py | 2 +- workflow/scripts/build_raw_fcio.py | 2 +- workflow/scripts/build_raw_orca.py | 2 +- workflow/scripts/build_skm.py | 2 +- workflow/scripts/build_tcm.py | 2 +- workflow/scripts/check_blinding.py | 2 +- workflow/scripts/complete_run.py | 6 ++-- workflow/scripts/merge_channels.py | 2 +- workflow/scripts/par_psp_geds.py | 2 +- workflow/scripts/pars_dsp_build_svm_geds.py | 2 +- workflow/scripts/pars_dsp_dplms_geds.py | 2 +- workflow/scripts/pars_dsp_eopt_geds.py | 2 +- workflow/scripts/pars_dsp_evtsel_geds.py | 2 +- workflow/scripts/pars_dsp_nopt_geds.py | 2 +- workflow/scripts/pars_dsp_tau_geds.py | 2 +- workflow/scripts/pars_hit_aoe.py | 4 +-- workflow/scripts/pars_hit_ecal.py | 4 +-- workflow/scripts/pars_hit_lq.py | 4 +-- workflow/scripts/pars_hit_qc.py | 4 +-- workflow/scripts/pars_pht_aoecal.py | 4 +-- workflow/scripts/pars_pht_fast.py | 4 +-- workflow/scripts/pars_pht_lqcal.py | 4 +-- workflow/scripts/pars_pht_partcal.py | 4 +-- workflow/scripts/pars_pht_qc.py | 4 +-- workflow/scripts/pars_pht_qc_phy.py | 4 +-- workflow/scripts/pars_tcm_pulser.py | 2 +- .../library => src/legenddataflow}/FileKey.py | 0 .../legenddataflow}/__init__.py | 0 .../legenddataflow}/cal_grouping.py | 0 .../legenddataflow}/convert_np.py | 0 .../legenddataflow}/create_pars_keylist.py | 0 .../library => src/legenddataflow}/execenv.py | 0 .../library => src/legenddataflow}/log.py | 0 .../legenddataflow}/pars_loading.py | 0 .../legenddataflow}/patterns.py | 0 .../library => src/legenddataflow}/utils.py | 0 40 files changed, 55 insertions(+), 58 deletions(-) rename workflow/{scripts/library => src/legenddataflow}/FileKey.py (100%) rename workflow/{scripts/library => src/legenddataflow}/__init__.py (100%) rename workflow/{scripts/library => src/legenddataflow}/cal_grouping.py (100%) rename workflow/{scripts/library => src/legenddataflow}/convert_np.py (100%) rename workflow/{scripts/library => src/legenddataflow}/create_pars_keylist.py (100%) rename workflow/{scripts/library => src/legenddataflow}/execenv.py (100%) rename workflow/{scripts/library => src/legenddataflow}/log.py (100%) rename workflow/{scripts/library => src/legenddataflow}/pars_loading.py (100%) rename workflow/{scripts/library => src/legenddataflow}/patterns.py (100%) rename workflow/{scripts/library => src/legenddataflow}/utils.py (100%) diff --git a/pyproject.toml b/pyproject.toml index 62ebab3..cf0bc78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,34 +9,31 @@ build-backend = "setuptools.build_meta" include-package-data = true zip-safe = false license-files = [ - "LICENSE", + "LICENSE.md", ] py-modules = [] [tool.setuptools.package-dir] -"" = "workflow" - -[tool.setuptools.packages.find] -where = [ - "workflow", -] +"" = "workflow/src" [tool.setuptools_scm] -write_to = "workflow/_version.py" +write_to = "workflow/src/legenddataflow_version.py" [project] name = "legend_dataflow" -description = "Python package for processing L200 data" +description = "Python package for processing LEGEND-200 data" authors = [ {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"}, {name = "Luigi Pertoldi", email = "gipert@pm.me"}, - {name = "The Legend Collaboration"}, +] +maintainers = [ + {name = "The LEGEND Collaboration"}, ] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT Expat License", + "License :: OSI Approved :: MIT License", "Operating System :: MacOS", "Operating System :: POSIX", "Operating System :: Unix", @@ -47,20 +44,19 @@ classifiers = [ ] readme = "README.md" requires-python = ">=3.11" +dynamic = ["version"] + dependencies = [ "dbetto>=1.0.5", "snakemake>=8.16", - #"pylegendmeta>=1.0.0", wait for new release -] -dynamic = [ - "version", + "pylegendmeta==1.2.0a2", ] [project.optional-dependencies] no_container = [ "pygama", "dspeed", - "pylegendmeta", + "pylegendmeta==1.2.0a2", "legend-pydataobj", "legend-daq2lh5", ] @@ -73,6 +69,7 @@ dev = [ "legend_dataflow[no_container]", "pytest >=6", "pytest-cov >=3", + "pre-commit", ] docs = [ "legend_dataflow[no_container]", @@ -85,4 +82,4 @@ docs = [ ] [project.scripts] -dataprod = "scripts.library.execenv:dataprod" +dataprod = "legenddataflow.execenv:dataprod" diff --git a/workflow/scripts/build_dsp.py b/workflow/scripts/build_dsp.py index 6f97406..f6e44df 100644 --- a/workflow/scripts/build_dsp.py +++ b/workflow/scripts/build_dsp.py @@ -8,9 +8,9 @@ from dbetto import TextDB from dbetto.catalog import Props from dspeed import build_dsp +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo import lh5 -from library.log import build_log def replace_list_with_array(dic): diff --git a/workflow/scripts/build_evt.py b/workflow/scripts/build_evt.py index 6ef1e0f..5eac164 100644 --- a/workflow/scripts/build_evt.py +++ b/workflow/scripts/build_evt.py @@ -6,9 +6,9 @@ import lgdo.lh5 as lh5 import numpy as np from dbetto import Props, TextDB +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.types import Array -from library.log import build_log from pygama.evt import build_evt sto = lh5.LH5Store() diff --git a/workflow/scripts/build_hit.py b/workflow/scripts/build_hit.py index 6310521..f096e0c 100644 --- a/workflow/scripts/build_hit.py +++ b/workflow/scripts/build_hit.py @@ -3,9 +3,9 @@ from pathlib import Path from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata, TextDB from lgdo import lh5 -from library.log import build_log from pygama.hit.build_hit import build_hit argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/build_raw_blind.py b/workflow/scripts/build_raw_blind.py index e343bde..5d582d4 100644 --- a/workflow/scripts/build_raw_blind.py +++ b/workflow/scripts/build_raw_blind.py @@ -16,9 +16,9 @@ import numexpr as ne import numpy as np from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata, TextDB from lgdo import lh5 -from library.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) diff --git a/workflow/scripts/build_raw_fcio.py b/workflow/scripts/build_raw_fcio.py index ddc765c..c3b577e 100644 --- a/workflow/scripts/build_raw_fcio.py +++ b/workflow/scripts/build_raw_fcio.py @@ -6,7 +6,7 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from library.log import build_log +from legenddataflow.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/workflow/scripts/build_raw_orca.py b/workflow/scripts/build_raw_orca.py index 0f5bbcb..c098806 100644 --- a/workflow/scripts/build_raw_orca.py +++ b/workflow/scripts/build_raw_orca.py @@ -6,7 +6,7 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from library.log import build_log +from legenddataflow.log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/workflow/scripts/build_skm.py b/workflow/scripts/build_skm.py index aefc31b..be2cfb3 100644 --- a/workflow/scripts/build_skm.py +++ b/workflow/scripts/build_skm.py @@ -3,9 +3,9 @@ import awkward as ak from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors -from library.log import build_log def get_all_out_fields(input_table, out_fields, current_field=""): diff --git a/workflow/scripts/build_tcm.py b/workflow/scripts/build_tcm.py index 2718c00..402c567 100644 --- a/workflow/scripts/build_tcm.py +++ b/workflow/scripts/build_tcm.py @@ -6,7 +6,7 @@ from daq2lh5.orca import orca_flashcam from dbetto import TextDB from dbetto.catalog import Props -from library.log import build_log +from legenddataflow.log import build_log from pygama.evt.build_tcm import build_tcm argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/check_blinding.py b/workflow/scripts/check_blinding.py index a81a1a3..2a47172 100644 --- a/workflow/scripts/check_blinding.py +++ b/workflow/scripts/check_blinding.py @@ -16,9 +16,9 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo import lh5 -from library.log import build_log from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima diff --git a/workflow/scripts/complete_run.py b/workflow/scripts/complete_run.py index 7ffd73a..4d5cad7 100644 --- a/workflow/scripts/complete_run.py +++ b/workflow/scripts/complete_run.py @@ -7,9 +7,9 @@ import time from pathlib import Path -import library.utils as ut -from library import patterns -from library.FileKey import FileKey +import legenddataflow.utils as ut +from legenddataflow import patterns +from legenddataflow.FileKey import FileKey print("INFO: dataflow ran successfully, now few final checks and scripts") diff --git a/workflow/scripts/merge_channels.py b/workflow/scripts/merge_channels.py index 209708d..1ca2026 100644 --- a/workflow/scripts/merge_channels.py +++ b/workflow/scripts/merge_channels.py @@ -5,9 +5,9 @@ import numpy as np from dbetto.catalog import Props +from legenddataflow.FileKey import ChannelProcKey from legendmeta import LegendMetadata from lgdo import lh5 -from library.FileKey import ChannelProcKey def replace_path(d, old_path, new_path): diff --git a/workflow/scripts/par_psp_geds.py b/workflow/scripts/par_psp_geds.py index d996f3c..c74ffa3 100644 --- a/workflow/scripts/par_psp_geds.py +++ b/workflow/scripts/par_psp_geds.py @@ -8,8 +8,8 @@ import matplotlib.pyplot as plt import numpy as np from dbetto.catalog import Props +from legenddataflow.FileKey import ChannelProcKey from legendmeta import LegendMetadata -from library.FileKey import ChannelProcKey mpl.use("Agg") diff --git a/workflow/scripts/pars_dsp_build_svm_geds.py b/workflow/scripts/pars_dsp_build_svm_geds.py index 7a0ecc9..3b7b7ea 100644 --- a/workflow/scripts/pars_dsp_build_svm_geds.py +++ b/workflow/scripts/pars_dsp_build_svm_geds.py @@ -4,8 +4,8 @@ from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from lgdo import lh5 -from library.log import build_log from sklearn.svm import SVC argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/pars_dsp_dplms_geds.py b/workflow/scripts/pars_dsp_dplms_geds.py index 457bda1..5d33fb8 100644 --- a/workflow/scripts/pars_dsp_dplms_geds.py +++ b/workflow/scripts/pars_dsp_dplms_geds.py @@ -8,9 +8,9 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo import Array, Table -from library.log import build_log from pygama.pargen.dplms_ge_dict import dplms_ge_dict argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/pars_dsp_eopt_geds.py b/workflow/scripts/pars_dsp_eopt_geds.py index a957c66..e59ee54 100644 --- a/workflow/scripts/pars_dsp_eopt_geds.py +++ b/workflow/scripts/pars_dsp_eopt_geds.py @@ -11,8 +11,8 @@ from dbetto import TextDB from dbetto.catalog import Props from dspeed.units import unit_registry as ureg +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.log import build_log from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( BayesianOptimizer, diff --git a/workflow/scripts/pars_dsp_evtsel_geds.py b/workflow/scripts/pars_dsp_evtsel_geds.py index 177eba6..dc76878 100644 --- a/workflow/scripts/pars_dsp_evtsel_geds.py +++ b/workflow/scripts/pars_dsp_evtsel_geds.py @@ -12,8 +12,8 @@ import pygama.pargen.energy_cal as pgc from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.log import build_log from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp diff --git a/workflow/scripts/pars_dsp_nopt_geds.py b/workflow/scripts/pars_dsp_nopt_geds.py index 53188ba..ae3aacb 100644 --- a/workflow/scripts/pars_dsp_nopt_geds.py +++ b/workflow/scripts/pars_dsp_nopt_geds.py @@ -8,8 +8,8 @@ import pygama.pargen.noise_optimization as pno from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.log import build_log from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp diff --git a/workflow/scripts/pars_dsp_tau_geds.py b/workflow/scripts/pars_dsp_tau_geds.py index 9a38526..1149c69 100644 --- a/workflow/scripts/pars_dsp_tau_geds.py +++ b/workflow/scripts/pars_dsp_tau_geds.py @@ -6,8 +6,8 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.log import build_log from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau diff --git a/workflow/scripts/pars_hit_aoe.py b/workflow/scripts/pars_hit_aoe.py index 575d3de..d7fa221 100644 --- a/workflow/scripts/pars_hit_aoe.py +++ b/workflow/scripts/pars_hit_aoe.py @@ -10,9 +10,9 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.convert_np import convert_dict_np_to_float +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.convert_np import convert_dict_np_to_float -from library.log import build_log from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids diff --git a/workflow/scripts/pars_hit_ecal.py b/workflow/scripts/pars_hit_ecal.py index 488463c..8bf4f1f 100644 --- a/workflow/scripts/pars_hit_ecal.py +++ b/workflow/scripts/pars_hit_ecal.py @@ -16,9 +16,9 @@ import pygama.math.histogram as pgh from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.convert_np import convert_dict_np_to_float +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.convert_np import convert_dict_np_to_float -from library.log import build_log from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids diff --git a/workflow/scripts/pars_hit_lq.py b/workflow/scripts/pars_hit_lq.py index 4a75a06..c5f04cb 100644 --- a/workflow/scripts/pars_hit_lq.py +++ b/workflow/scripts/pars_hit_lq.py @@ -9,9 +9,9 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.convert_np import convert_dict_np_to_float +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.convert_np import convert_dict_np_to_float -from library.log import build_log from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids diff --git a/workflow/scripts/pars_hit_qc.py b/workflow/scripts/pars_hit_qc.py index 460e858..c9d380f 100644 --- a/workflow/scripts/pars_hit_qc.py +++ b/workflow/scripts/pars_hit_qc.py @@ -11,10 +11,10 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.convert_np import convert_dict_np_to_float +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.lh5 import ls -from library.convert_np import convert_dict_np_to_float -from library.log import build_log from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, diff --git a/workflow/scripts/pars_pht_aoecal.py b/workflow/scripts/pars_pht_aoecal.py index f46fb7b..bbcf791 100644 --- a/workflow/scripts/pars_pht_aoecal.py +++ b/workflow/scripts/pars_pht_aoecal.py @@ -13,9 +13,9 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.FileKey import ChannelProcKey, ProcessingFileKey -from library.log import build_log from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids diff --git a/workflow/scripts/pars_pht_fast.py b/workflow/scripts/pars_pht_fast.py index cf90b94..1dfd1d6 100644 --- a/workflow/scripts/pars_pht_fast.py +++ b/workflow/scripts/pars_pht_fast.py @@ -10,9 +10,9 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.FileKey import ChannelProcKey, ProcessingFileKey -from library.log import build_log from pars_pht_aoecal import run_aoe_calibration from pars_pht_lqcal import run_lq_calibration from pars_pht_partcal import calibrate_partition diff --git a/workflow/scripts/pars_pht_lqcal.py b/workflow/scripts/pars_pht_lqcal.py index d470480..8826efd 100644 --- a/workflow/scripts/pars_pht_lqcal.py +++ b/workflow/scripts/pars_pht_lqcal.py @@ -11,9 +11,9 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.FileKey import ChannelProcKey, ProcessingFileKey -from library.log import build_log from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.data_cleaning import get_tcm_pulser_ids diff --git a/workflow/scripts/pars_pht_partcal.py b/workflow/scripts/pars_pht_partcal.py index b726b96..b3e43c4 100644 --- a/workflow/scripts/pars_pht_partcal.py +++ b/workflow/scripts/pars_pht_partcal.py @@ -13,9 +13,9 @@ import pygama.math.histogram as pgh from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.FileKey import ChannelProcKey, ProcessingFileKey -from library.log import build_log from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration diff --git a/workflow/scripts/pars_pht_qc.py b/workflow/scripts/pars_pht_qc.py index e3fbd12..2ad477a 100644 --- a/workflow/scripts/pars_pht_qc.py +++ b/workflow/scripts/pars_pht_qc.py @@ -11,10 +11,10 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.convert_np import convert_dict_np_to_float +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.lh5 import ls -from library.convert_np import convert_dict_np_to_float -from library.log import build_log from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, diff --git a/workflow/scripts/pars_pht_qc_phy.py b/workflow/scripts/pars_pht_qc_phy.py index c235064..791fa2b 100644 --- a/workflow/scripts/pars_pht_qc_phy.py +++ b/workflow/scripts/pars_pht_qc_phy.py @@ -12,10 +12,10 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.convert_np import convert_dict_np_to_float +from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.lh5 import ls -from library.convert_np import convert_dict_np_to_float -from library.log import build_log from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, diff --git a/workflow/scripts/pars_tcm_pulser.py b/workflow/scripts/pars_tcm_pulser.py index b7618d1..56700ec 100644 --- a/workflow/scripts/pars_tcm_pulser.py +++ b/workflow/scripts/pars_tcm_pulser.py @@ -6,8 +6,8 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props +from legenddataflow.log import build_log from legendmeta import LegendMetadata -from library.log import build_log from pygama.pargen.data_cleaning import get_tcm_pulser_ids argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/library/FileKey.py b/workflow/src/legenddataflow/FileKey.py similarity index 100% rename from workflow/scripts/library/FileKey.py rename to workflow/src/legenddataflow/FileKey.py diff --git a/workflow/scripts/library/__init__.py b/workflow/src/legenddataflow/__init__.py similarity index 100% rename from workflow/scripts/library/__init__.py rename to workflow/src/legenddataflow/__init__.py diff --git a/workflow/scripts/library/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py similarity index 100% rename from workflow/scripts/library/cal_grouping.py rename to workflow/src/legenddataflow/cal_grouping.py diff --git a/workflow/scripts/library/convert_np.py b/workflow/src/legenddataflow/convert_np.py similarity index 100% rename from workflow/scripts/library/convert_np.py rename to workflow/src/legenddataflow/convert_np.py diff --git a/workflow/scripts/library/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py similarity index 100% rename from workflow/scripts/library/create_pars_keylist.py rename to workflow/src/legenddataflow/create_pars_keylist.py diff --git a/workflow/scripts/library/execenv.py b/workflow/src/legenddataflow/execenv.py similarity index 100% rename from workflow/scripts/library/execenv.py rename to workflow/src/legenddataflow/execenv.py diff --git a/workflow/scripts/library/log.py b/workflow/src/legenddataflow/log.py similarity index 100% rename from workflow/scripts/library/log.py rename to workflow/src/legenddataflow/log.py diff --git a/workflow/scripts/library/pars_loading.py b/workflow/src/legenddataflow/pars_loading.py similarity index 100% rename from workflow/scripts/library/pars_loading.py rename to workflow/src/legenddataflow/pars_loading.py diff --git a/workflow/scripts/library/patterns.py b/workflow/src/legenddataflow/patterns.py similarity index 100% rename from workflow/scripts/library/patterns.py rename to workflow/src/legenddataflow/patterns.py diff --git a/workflow/scripts/library/utils.py b/workflow/src/legenddataflow/utils.py similarity index 100% rename from workflow/scripts/library/utils.py rename to workflow/src/legenddataflow/utils.py From 3eca65dd54cd1c39aa4f0955c11e8a3d7d2598dd Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 11:33:55 +0100 Subject: [PATCH 058/101] fix docs --- docs/Makefile | 5 +++-- pyproject.toml | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index ff41907..b85f221 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,8 +17,9 @@ apidoc: clean-apidoc --module-first \ --force \ --output-dir "$(SOURCEDIR)/api" \ - ../scripts \ - ../rules + ../workflow/src/legenddataflow \ + ../workflow/scripts \ + ../workflow/rules clean-apidoc: rm -rf "$(SOURCEDIR)/api" diff --git a/pyproject.toml b/pyproject.toml index cf0bc78..ee2f40c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,11 @@ py-modules = [] [tool.setuptools.package-dir] "" = "workflow/src" +[tool.setuptools.packages.find] +where = ["workflow/src"] + [tool.setuptools_scm] -write_to = "workflow/src/legenddataflow_version.py" +write_to = "workflow/src/legenddataflow/_version.py" [project] name = "legend_dataflow" From e4df0d314497e29bc4f9fd4e9f0cc3f2c1f0a4bf Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 11:54:18 +0100 Subject: [PATCH 059/101] pre-commit updates, minor format change in config.pkg_versions --- .pre-commit-config.yaml | 168 +++++++++++++------------ LICENSE.md | 9 +- config-lngs.yaml | 48 ++++--- config-nersc.yaml | 22 ++-- workflow/src/legenddataflow/execenv.py | 59 ++++----- 5 files changed, 160 insertions(+), 146 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96cec14..1b3a8b9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,97 +4,101 @@ ci: autofix_commit_msg: "style: pre-commit fixes" repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: "v5.0.0" - hooks: - - id: check-added-large-files - - id: check-case-conflict - - id: check-merge-conflict - - id: check-symlinks - - id: check-yaml - - id: check-json - - id: check-toml - - id: check-docstring-first - - id: debug-statements - - id: end-of-file-fixer - - id: forbid-new-submodules - - id: mixed-line-ending - - id: name-tests-test - args: ["--pytest-test-first"] - - id: requirements-txt-fixer - - id: trailing-whitespace + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: "v5.0.0" + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + - id: check-json + - id: check-toml + - id: check-docstring-first + - id: debug-statements + - id: end-of-file-fixer + - id: forbid-new-submodules + - id: mixed-line-ending + - id: name-tests-test + args: ["--pytest-test-first"] + - id: requirements-txt-fixer + - id: trailing-whitespace -- repo: https://github.com/psf/black - rev: "24.10.0" - hooks: - - id: black-jupyter - args: ["--line-length", "99"] + - repo: https://github.com/psf/black + rev: "24.10.0" + hooks: + - id: black-jupyter + args: ["--line-length", "99"] -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.8.6" - hooks: - - id: ruff - args: ["--fix", "--show-fixes"] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.8.6" + hooks: + - id: ruff + args: ["--fix", "--show-fixes"] -- repo: https://github.com/abravalheri/validate-pyproject - rev: "v0.23" - hooks: - - id: validate-pyproject - additional_dependencies: ["validate-pyproject-schema-store[all]"] + - repo: https://github.com/abravalheri/validate-pyproject + rev: "v0.23" + hooks: + - id: validate-pyproject + additional_dependencies: ["validate-pyproject-schema-store[all]"] -- repo: https://github.com/python-jsonschema/check-jsonschema - rev: "0.30.0" - hooks: - - id: check-dependabot - - id: check-github-workflows - - id: check-readthedocs + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: "0.30.0" + hooks: + - id: check-dependabot + - id: check-github-workflows + - id: check-readthedocs -- repo: https://github.com/pre-commit/mirrors-mypy - rev: "v1.14.1" - hooks: - - id: mypy - files: src - stages: [manual] + - repo: https://github.com/pre-commit/mirrors-mypy + rev: "v1.14.1" + hooks: + - id: mypy + files: src + stages: [manual] -- repo: https://github.com/kynan/nbstripout - rev: "0.8.1" - hooks: - - id: nbstripout - args: ["--drop-empty-cells", - "--extra-keys", "metadata.kernelspec metadata.language_info"] + - repo: https://github.com/kynan/nbstripout + rev: "0.8.1" + hooks: + - id: nbstripout + args: + [ + "--drop-empty-cells", + "--extra-keys", + "metadata.kernelspec metadata.language_info", + ] -- repo: https://github.com/mgedmin/check-manifest - rev: "0.50" - hooks: - - id: check-manifest - stages: [manual] + - repo: https://github.com/mgedmin/check-manifest + rev: "0.50" + hooks: + - id: check-manifest + stages: [manual] -- repo: https://github.com/codespell-project/codespell - rev: "v2.3.0" - hooks: - - id: codespell - args: ["-L", "nd,unparseable,compiletime,livetime,fom,puls"] + - repo: https://github.com/codespell-project/codespell + rev: "v2.3.0" + hooks: + - id: codespell + args: ["-L", "nd,unparseable,compiletime,livetime,fom,puls"] -- repo: https://github.com/shellcheck-py/shellcheck-py - rev: "v0.10.0.1" - hooks: - - id: shellcheck + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: "v0.10.0.1" + hooks: + - id: shellcheck -- repo: https://github.com/pre-commit/pygrep-hooks - rev: "v1.10.0" - hooks: - - id: rst-backticks - - id: rst-directive-colons - - id: rst-inline-touching-normal + - repo: https://github.com/pre-commit/pygrep-hooks + rev: "v1.10.0" + hooks: + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal -- repo: https://github.com/pre-commit/mirrors-prettier - rev: "v4.0.0-alpha.8" - hooks: - - id: prettier - types_or: [json] + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v4.0.0-alpha.8" + hooks: + - id: prettier + types_or: [yaml, markdown, json] -- repo: https://github.com/snakemake/snakefmt - rev: v0.10.2 - hooks: - - id: snakefmt - files: Snakefile*|\.smk + - repo: https://github.com/snakemake/snakefmt + rev: v0.10.2 + hooks: + - id: snakefmt + files: Snakefile*|\.smk diff --git a/LICENSE.md b/LICENSE.md index 35d8ee3..b07a92a 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -2,10 +2,10 @@ The legend-dataflow package is licensed under the MIT "Expat" License: > Copyright (c) 2021: > -> Matteo Agostini -> Oliver Schulz -> George Marshall -> Luigi Pertoldi +> Matteo Agostini +> Oliver Schulz +> George Marshall +> Luigi Pertoldi > > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal @@ -24,4 +24,3 @@ The legend-dataflow package is licensed under the MIT "Expat" License: > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. -> diff --git a/config-lngs.yaml b/config-lngs.yaml index 901cac8..971399c 100644 --- a/config-lngs.yaml +++ b/config-lngs.yaml @@ -1,15 +1,18 @@ setups: l200: paths: - sandbox_path: '' - tier_daq: $_/generated/tier/daq - tier_raw_blind: '' + sandbox_path: /data1/shared/l200-p13/sandbox + tier_daq: $_/../daq/generated/tier/daq + tier_raw_blind: "" + workflow: $_/workflow + metadata: $_/inputs config: $_/inputs/dataprod/config par_overwrite: $_/inputs/dataprod/overrides chan_map: $_/inputs/hardware/configuration detector_db: $_/inputs/hardware/detectors + tier: $_/generated/tier tier_raw: $_/generated/tier/raw tier_tcm: $_/generated/tier/tcm @@ -22,6 +25,7 @@ setups: tier_pan: $_/generated/tier/pan tier_pet: $_/generated/tier/pet tier_skm: $_/generated/tier/skm + par: $_/generated/par par_raw: $_/generated/par/raw par_tcm: $_/generated/par/tcm @@ -31,41 +35,47 @@ setups: par_psp: $_/generated/par/psp par_pht: $_/generated/par/pht par_pet: $_/generated/par/pet + plt: $_/generated/plt log: $_/generated/log + tmp_plt: $_/generated/tmp/plt tmp_log: $_/generated/tmp/log tmp_filelists: $_/generated/tmp/filelists tmp_par: $_/generated/tmp/par + src: $_/software/python/src install: $_/software/python/install cache: $_/software/python/cache + table_format: raw: ch{ch:07d}/raw dsp: ch{ch:07d}/dsp psp: ch{ch:07d}/dsp hit: ch{ch:07d}/hit pht: ch{ch:07d}/hit - evt: '{grp}/evt' - pet: '{grp}/evt' - skm: '{grp}/skm' + evt: "{grp}/evt" + pet: "{grp}/evt" + skm: "{grp}/skm" tcm: hardware_tcm_1 + execenv: cmd: apptainer run arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif env: - HDF5_USE_FILE_LOCKING: 'False' - LGDO_BOUNDSCHECK: 'false' - DSPEED_BOUNDSCHECK: 'false' - PYGAMA_PARALLEL: 'false' - PYGAMA_FASTMATH: 'false' + PRODENV: $PRODENV + HDF5_USE_FILE_LOCKING: "False" + LGDO_BOUNDSCHECK: "false" + DSPEED_BOUNDSCHECK: "false" + PYGAMA_PARALLEL: "false" + PYGAMA_FASTMATH: "false" + DISABLE_TQDM: "True" + pkg_versions: - pygama: pygama==2.0.3 - pylegendmeta: pylegendmeta==1.1.0 - dspeed: dspeed==1.6.1 - legend-pydataobj: legend-pydataobj==1.9.0 - legend-daq2lh5: legend-daq2lh5==1.2.2 - tensorflow: tensorflow==2.17 - keras: keras==3.6.0 - jax: jax==0.4.30 + - pygama==2.0.* + - pylegendmeta==1.2.0a2 + - dspeed==1.6.* + - legend-pydataobj>=1.11.4 + - legend-daq2lh5==1.4.* + meta_version: v0.5.7 diff --git a/config-nersc.yaml b/config-nersc.yaml index 88b5156..f94d8ff 100644 --- a/config-nersc.yaml +++ b/config-nersc.yaml @@ -1,9 +1,9 @@ setups: l200: paths: - sandbox_path: '' + sandbox_path: "" tier_daq: $_/generated/tier/daq - tier_raw_blind: '' + tier_raw_blind: "" workflow: $_/workflow metadata: $_/inputs config: $_/inputs/dataprod/config @@ -44,19 +44,19 @@ setups: psp: ch{ch:07d}/dsp hit: ch{ch:07d}/hit pht: ch{ch:07d}/hit - evt: '{grp}/evt' - pet: '{grp}/evt' - skm: '{grp}/skm' + evt: "{grp}/evt" + pet: "{grp}/evt" + skm: "{grp}/skm" tcm: hardware_tcm_1 execenv: cmd: shifter - arg: ' --image legendexp/legend-base:latest' + arg: " --image legendexp/legend-base:latest" env: - HDF5_USE_FILE_LOCKING: 'FALSE' - LGDO_BOUNDSCHECK: 'false' - DSPEED_BOUNDSCHECK: 'false' - PYGAMA_PARALLEL: 'false' - PYGAMA_FASTMATH: 'false' + HDF5_USE_FILE_LOCKING: "FALSE" + LGDO_BOUNDSCHECK: "false" + DSPEED_BOUNDSCHECK: "false" + PYGAMA_PARALLEL: "false" + PYGAMA_FASTMATH: "false" pkg_versions: pygama: pygama==2.0.3 pylegendmeta: pylegendmeta==0.10.2 diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index c4e249d..5fce213 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -8,7 +8,8 @@ import subprocess from pathlib import Path -import yaml +import dbetto +from packaging.requirements import Requirement def dataprod() -> None: @@ -17,7 +18,7 @@ def dataprod() -> None: .. code-block:: console $ dataprod --help - $ dataprod load --help # help section for a specific sub-command + $ dataprod exec --help # help section for a specific sub-command """ parser = argparse.ArgumentParser( @@ -36,14 +37,14 @@ def dataprod() -> None: ) parser_install.set_defaults(func=install) - parser_load = subparsers.add_parser( - "load", help="load data production environment and execute a given command" + parser_exec = subparsers.add_parser( + "exec", help="load data production environment and execute a given command" ) - parser_load.add_argument("config_file", help="production cycle configuration file", type=str) - parser_load.add_argument( + parser_exec.add_argument("config_file", help="production cycle configuration file", type=str) + parser_exec.add_argument( "command", help="command to run within the container", type=str, nargs="+" ) - parser_load.set_defaults(func=load) + parser_exec.set_defaults(func=cmdexec) args = parser.parse_args() args.func(args) @@ -52,13 +53,14 @@ def dataprod() -> None: def install(args) -> None: """ This function installs user software in the data production environment. - The software packages should be specified in the config.yaml file with the format: + The software packages should be specified in the config.yaml file with the + format: ```yaml setups: - l200: - pkg_versions: - package_name: package_version + l200: + pkg_versions: + - python_package_spec ``` """ print(args.config_file) @@ -67,14 +69,13 @@ def install(args) -> None: raise RuntimeError(msg) config_file_dir = Path(args.config_file).resolve().parent - with Path(args.config_file).open() as r: - config_dic = yaml.safe_load(r) + config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file)) - exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"] - exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"] - path_src = config_dic["setups"]["l200"]["paths"]["src"] - path_install = config_dic["setups"]["l200"]["paths"]["install"] - path_cache = config_dic["setups"]["l200"]["paths"]["cache"] + exec_cmd = config_dic.setups.l200.execenv.cmd + exec_arg = config_dic.setups.l200.execenv.arg + path_src = config_dic.setups.l200.paths.src + path_install = config_dic.setups.l200.paths.install + path_cache = config_dic.setups.l200.paths.cache exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) @@ -87,11 +88,12 @@ def install(args) -> None: shutil.rmtree(path_cache) pkg_list = "" - for pkg, pkg_version in config_dic["setups"]["l200"]["pkg_versions"].items(): + for spec in config_dic.setups.l200.pkg_versions: + pkg = Requirement(spec).name if (path_src / pkg).exists(): pkg_list += f" '{path_src / pkg}'" else: - pkg_list += f" '{pkg_version}'" + pkg_list += f" '{spec}'" cmd_expr = ( f"PYTHONUSERBASE={path_install} PIP_CACHE_DIR={path_cache} " @@ -101,23 +103,22 @@ def install(args) -> None: os.system(cmd_expr) -def load(args) -> None: +def cmdexec(args) -> None: """ This function loads the data production environment and executes a given command. """ if not Path(args.config_file).is_file(): - print("Error: config file does not exist") - exit() + msg = "config file is not a regular file" + raise RuntimeError(msg) config_file_dir = Path(args.config_file).resolve().parent - with Path(args.config_file).open() as r: - config_dic = yaml.safe_load(r) + config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file)) - exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"] - exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"] - env_vars = config_dic["setups"]["l200"]["execenv"]["env"] - path_install = config_dic["setups"]["l200"]["paths"]["install"] + exec_cmd = config_dic.setups.l200.execenv.cmd + exec_arg = config_dic.setups.l200.execenv.arg + env_vars = config_dic.setups.l200.execenv.env + path_install = config_dic.setups.l200.paths.install exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) From 48a35e04f3bcfc51c9540eeb273cc865b8b72d39 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 12:01:24 +0100 Subject: [PATCH 060/101] really fix the RTD build --- .readthedocs.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index ca8910f..103c066 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,6 +17,8 @@ build: --module-first --force --output-dir docs/source/api - scripts + workflow/scripts + workflow/src + workflow/rules - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs/source $READTHEDOCS_OUTPUT/html From 2af22db5cd320ba1f53bed688c6dc48f0cd7466e Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 12:30:05 +0100 Subject: [PATCH 061/101] fix all package imports --- workflow/Snakefile | 65 ++++++++++--------------- workflow/Snakefile-build-raw | 41 ++++++---------- workflow/rules/ann.smk | 2 +- workflow/rules/blinding_calibration.smk | 2 +- workflow/rules/blinding_check.smk | 2 +- workflow/rules/chanlist_gen.smk | 6 +-- workflow/rules/common.smk | 40 +++++++-------- workflow/rules/dsp.smk | 8 +-- workflow/rules/dsp_pars_geds.smk | 4 +- workflow/rules/evt.smk | 4 +- workflow/rules/filelist_gen.smk | 28 +++++------ workflow/rules/hit.smk | 6 +-- workflow/rules/main.smk | 2 +- workflow/rules/pht.smk | 8 +-- workflow/rules/pht_fast.smk | 8 +-- workflow/rules/psp.smk | 6 +-- workflow/rules/psp_pars_geds.smk | 8 +-- workflow/rules/qc_phy.smk | 8 +-- workflow/rules/raw.smk | 6 +-- workflow/rules/skm.smk | 2 +- workflow/rules/tcm.smk | 2 +- 21 files changed, 112 insertions(+), 146 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index ba839be..011cb05 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -17,43 +17,28 @@ import glob from datetime import datetime from collections import OrderedDict import logging -from pylegendmeta import LegendMetadata - -import scripts.library as lib -from scripts.library.pars_loading import ParsCatalog -from scripts.library.patterns import get_pattern_tier -from scripts.library.utils import ( - subst_vars_in_snakemake_config, - runcmd, - config_path, - chan_map_path, - filelist_path, - metadata_path, - tmp_log_path, - pars_path, - det_status_path, -) - -# Set with `snakemake --configfile=/path/to/your/config.json` -# configfile: "have/to/specify/path/to/your/config.json" - -subst_vars_in_snakemake_config(workflow, config) + +from legendmeta import LegendMetadata +from legenddataflow import CalGrouping +from legenddataflow import utils + +utils.subst_vars_in_snakemake_config(workflow, config) check_in_cycle = True setup = config["setups"]["l200"] -configs = config_path(setup) -chan_maps = chan_map_path(setup) -meta = metadata_path(setup) -det_status = det_status_path(setup) -swenv = runcmd(setup) +configs = utils.config_path(setup) +chan_maps = utils.chan_map_path(setup) +meta = utils.metadata_path(setup) +det_status = utils.det_status_path(setup) +swenv = utils.runcmd(setup) basedir = workflow.basedir # wait for new pylegendmeta release # if not Path(meta).exists(): # meta = LegendMetadata() -# meta.checkout(config["setups"]["l200"]["meta_version"]) +# meta.checkout(config["setups"]["l200"]["legend_metadata_version"]) -part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") +part = CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") wildcard_constraints: @@ -96,7 +81,7 @@ onstart: shell('{swenv} python3 -B -c "import ' + pkg + '"') # Log parameter catalogs in validity.jsonl files - hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" + hit_par_cat_file = Path(utils.pars_path(setup)) / "hit" / "validity.yaml" if hit_par_cat_file.is_file(): hit_par_cat_file.unlink() try: @@ -105,7 +90,7 @@ onstart: except NameError: print("No hit parameter catalog found") - pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" + pht_par_cat_file = Path(utils.pars_path(setup)) / "pht" / "validity.yaml" if pht_par_cat_file.is_file(): pht_par_cat_file.unlink() try: @@ -114,7 +99,7 @@ onstart: except NameError: print("No pht parameter catalog found") - dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" + dsp_par_cat_file = Path(utils.pars_path(setup)) / "dsp" / "validity.yaml" if dsp_par_cat_file.is_file(): dsp_par_cat_file.unlink() try: @@ -123,7 +108,7 @@ onstart: except NameError: print("No dsp parameter catalog found") - psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" + psp_par_cat_file = Path(utils.pars_path(setup)) / "psp" / "validity.yaml" if psp_par_cat_file.is_file(): psp_par_cat_file.unlink() try: @@ -155,24 +140,24 @@ onsuccess: os.remove(file) # remove filelists - files = glob.glob(os.path.join(filelist_path(setup), "*")) + files = glob.glob(os.path.join(utils.filelist_path(setup), "*")) for file in files: if os.path.isfile(file): os.remove(file) - if os.path.exists(filelist_path(setup)): - os.rmdir(filelist_path(setup)) + if os.path.exists(utils.filelist_path(setup)): + os.rmdir(utils.filelist_path(setup)) # remove logs - files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log")) + files = glob.glob(os.path.join(utils.tmp_log_path(setup), "*", "*.log")) for file in files: if os.path.isfile(file): os.remove(file) - dirs = glob.glob(os.path.join(tmp_log_path(setup), "*")) + dirs = glob.glob(os.path.join(utils.tmp_log_path(setup), "*")) for d in dirs: if os.path.isdir(d): os.rmdir(d) - if os.path.exists(tmp_log_path(setup)): - os.rmdir(tmp_log_path(setup)) + if os.path.exists(utils.tmp_log_path(setup)): + os.rmdir(utils.tmp_log_path(setup)) rule gen_filelist: @@ -192,6 +177,6 @@ rule gen_filelist: analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), + temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"), script: "scripts/write_filelist.py" diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index 0dec789..763cb8a 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -8,39 +8,25 @@ to the blinded raw data. It handles: import os, sys from pathlib import Path -from scripts.util import patterns as patt -from scripts.util.utils import ( - subst_vars_in_snakemake_config, - runcmd, - config_path, - chan_map_path, - filelist_path, - pars_path, - metadata_path, - det_status_path, -) -from scripts.util.create_pars_keylist import ParsKeyResolve +from legenddataflow import patterns as patt +from legenddataflow import utils, ParsKeyResolve check_in_cycle = True -# Set with `snakemake --configfile=/path/to/your/config.json` -# configfile: "have/to/specify/path/to/your/config.json" - -subst_vars_in_snakemake_config(workflow, config) +utils.subst_vars_in_snakemake_config(workflow, config) setup = config["setups"]["l200"] -configs = config_path(setup) -chan_maps = chan_map_path(setup) -swenv = runcmd(setup) -meta = metadata_path(setup) -det_status = det_status_path(setup) +configs = utils.config_path(setup) +chan_maps = utils.chan_map_path(setup) +swenv = utils.runcmd(setup) +meta = utils.metadata_path(setup) +det_status = utils.det_status_path(setup) basedir = workflow.basedir # wait for new pylegendmeta release # if not Path(meta).exists(): # meta = LegendMetadata() -# meta.checkout(config["setups"]["l200"]["meta_version"]) -s +# meta.checkout(config["setups"]["l200"]["legend_metadata_version"]) wildcard_constraints: @@ -70,7 +56,7 @@ onstart: shell('{swenv} python3 -B -c "import daq2lh5 "') - raw_par_cat_file = Path(pars_path(setup)) / "raw" / "validity.yaml" + raw_par_cat_file = Path(utils.pars_path(setup)) / "raw" / "validity.yaml" if raw_par_cat_file.is_file(): raw_par_cat_file.unlink() try: @@ -83,7 +69,7 @@ onstart: onsuccess: print("Workflow finished, no error") shell("rm *.gen || true") - shell(f"rm {filelist_path(setup)}/* || true") + shell(f"rm {utils.filelist_path(setup)}/* || true") rule gen_filelist: @@ -96,7 +82,7 @@ rule gen_filelist: analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"), + temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"), script: "scripts/write_filelist.py" @@ -112,3 +98,6 @@ rule sort_data: patt.get_pattern_tier_daq(setup, extension="fcio"), shell: "mv {input} {output}" + + +# vim: filetype=snakemake diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index f1a47cd..2565514 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -4,7 +4,7 @@ to apply the ann and risetime cuts for psd. """ -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index d28072f..b8076d7 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -4,7 +4,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps: - combining all channels into single par file """ -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_pars, get_pattern_plts, get_pattern_pars_tmp_channel, diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index e556abb..b142c19 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -4,7 +4,7 @@ Snakemake rules for checking blinding. Two steps: - combining all channel check files into single check file """ -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index 68c33e4..4e46f13 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -4,12 +4,12 @@ import os import random import re -from scripts.library.FileKey import ChannelProcKey -from scripts.library.patterns import ( +from legenddataflow.FileKey import ChannelProcKey +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) -from scripts.library.utils import filelist_path, runcmd +from legenddataflow.utils import filelist_path, runcmd def get_par_chanlist( diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 4f99d5c..17571e3 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -3,16 +3,10 @@ Helper functions for running data production """ from pathlib import Path -from scripts.library.patterns import ( - get_pattern_tier_daq_unsorted, - get_pattern_tier_daq, - get_pattern_tier, - par_overwrite_path, - get_pars_path, -) -from scripts.library import ProcessingFileKey +from legenddataflow import patterns as patt +from legenddataflow import ProcessingFileKey from dbetto.catalog import Catalog -from scripts.library import utils +from legenddataflow import utils def ro(path): @@ -22,14 +16,14 @@ def ro(path): def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" par_files = Catalog.get_files( - Path(par_overwrite_path(setup)) / "raw" / "validity.yaml", + Path(patt.par_overwrite_path(setup)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): - return str(Path(par_overwrite_path(setup)) / "raw" / par_files) + return str(Path(patt.par_overwrite_path(setup)) / "raw" / par_files) else: return [ - str(Path(par_overwrite_path(setup)) / "raw" / par_file) + str(Path(patt.par_overwrite_path(setup)) / "raw" / par_file) for par_file in par_files ] @@ -37,12 +31,14 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" par_files = Catalog.get_files( - Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp + Path(patt.get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return Path(get_pars_path(setup, "raw")) / par_files + return Path(patt.get_pars_path(setup, "raw")) / par_files else: - return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files] + return [ + Path(patt.get_pars_path(setup, "raw")) / par_file for par_file in par_files + ] def set_last_rule_name(workflow, new_name): @@ -71,19 +67,19 @@ def set_last_rule_name(workflow, new_name): def get_input_par_file(wildcards, tier, name): - par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml" pars_files_overwrite = Catalog.get_files( par_overwrite_file, wildcards.timestamp, ) for pars_file in pars_files_overwrite: if name in str(pars_file): - return Path(par_overwrite_path(setup)) / tier / pars_file + return Path(patt.par_overwrite_path(setup)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml" + par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml" if timestamp is not None: pars_files_overwrite = Catalog.get_files( par_overwrite_file, @@ -101,7 +97,7 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): out_files = [] for pars_file in pars_files_overwrite: if fullname in str(pars_file): - out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file) + out_files.append(Path(patt.par_overwrite_path(setup)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: @@ -113,8 +109,8 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return get_pattern_tier_daq_unsorted(setup, extension="*") + return patt.get_pattern_tier_daq_unsorted(setup, extension="*") elif tier == "raw": - return get_pattern_tier_daq(setup, extension="*") + return patt.get_pattern_tier_daq(setup, extension="*") else: - return get_pattern_tier(setup, "raw", check_in_cycle=False) + return patt.get_pattern_tier(setup, "raw", check_in_cycle=False) diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index f4f8487..501ed52 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -4,11 +4,11 @@ Snakemake rules for processing dsp tier. - running dsp over all channels using par file """ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.library.create_pars_keylist import ParsKeyResolve -from scripts.library.patterns import ( +from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.patterns import ( get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 7f16c9e..f526d6b 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -4,8 +4,8 @@ Snakemake rules for building dsp pars for HPGes, before running build_dsp() - extraction of energy filter parameters and charge trapping correction for each channel from cal data """ -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.patterns import ( +from legenddataflow.create_pars_keylist import pars_key_resolve +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index 4e96a85..d14b8cb 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -2,8 +2,8 @@ Snakemake rules for processing evt tier. """ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.patterns import ( +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, diff --git a/workflow/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk index 5d1f928..32d6175 100644 --- a/workflow/rules/filelist_gen.smk +++ b/workflow/rules/filelist_gen.smk @@ -2,12 +2,8 @@ import glob import json, yaml from pathlib import Path -from scripts.library.FileKey import FileKey, run_grouper -from scripts.library.patterns import ( - get_pattern_tier, - get_pattern_tier_raw_blind, - get_pattern_tier_daq, -) +from legenddataflow.FileKey import FileKey, run_grouper +from legenddataflow import patterns as patt concat_datatypes = ["phy"] concat_tiers = ["skm", "pet_concat", "evt_concat"] @@ -116,15 +112,15 @@ def get_pattern(setup, tier): as only phy files are taken to skm others are only taken to pet """ if tier == "blind": - fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(setup, "raw", check_in_cycle=False) elif tier in ("skm", "pet_concat"): - fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(setup, "pet", check_in_cycle=False) elif tier == "evt_concat": - fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(setup, "evt", check_in_cycle=False) elif tier == "daq": - fn_pattern = get_pattern_tier_daq(setup, extension="{ext}") + fn_pattern = patt.get_pattern_tier_daq(setup, extension="{ext}") else: - fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(setup, tier, check_in_cycle=False) return fn_pattern @@ -132,15 +128,15 @@ def concat_phy_filenames(setup, phy_filenames, tier): """ This function concatenates the files from the same run together """ - fn_pattern = get_pattern(setup, tier) + fn_pattern = patt.get_pattern(setup, tier) # group files by run - sorted_phy_filenames = run_grouper(phy_filenames) + sorted_phy_filenames = patt.run_grouper(phy_filenames) phy_filenames = [] for run in sorted_phy_filenames: key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) out_key = FileKey.get_path_from_filekey( - key, get_pattern_tier(setup, tier, check_in_cycle=False) + key, patt.get_pattern_tier(setup, tier, check_in_cycle=False) )[0] phy_filenames.append(out_key) @@ -181,11 +177,11 @@ def build_filelist( else: if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( - _key, get_pattern_tier_raw_blind(setup) + _key, patt.get_pattern_tier_raw_blind(setup) ) elif tier == "skm": filename = FileKey.get_path_from_filekey( - _key, get_pattern_tier(setup, "pet", check_in_cycle=False) + _key, patt.get_pattern_tier(setup, "pet", check_in_cycle=False) ) elif tier == "daq": filename = FileKey.get_path_from_filekey( diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 5ea14ff..0af7590 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -6,10 +6,10 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk index f227f17..e0d886e 100644 --- a/workflow/rules/main.smk +++ b/workflow/rules/main.smk @@ -1,6 +1,6 @@ import os from datetime import datetime -from scripts.library.utils import ( +from legenddataflow.utils import ( filelist_path, log_path, tmp_par_path, diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 239e3c5..27e4f81 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -6,11 +6,11 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.library.utils import filelist_path, set_last_rule_name -from scripts.library.patterns import ( +from legenddataflow.utils import filelist_path, set_last_rule_name +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk index b177f12..75d8e7e 100644 --- a/workflow/rules/pht_fast.smk +++ b/workflow/rules/pht_fast.smk @@ -1,7 +1,7 @@ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.create_pars_keylist import ParsKeyResolve -from scripts.library.utils import filelist_path, set_last_rule_name -from scripts.library.patterns import ( +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.utils import filelist_path, set_last_rule_name +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index dc0cfe5..d55fbcc 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -4,10 +4,10 @@ Snakemake rules for processing psp (partition dsp) tier data. - running build hit over all channels using par file """ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve from pathlib import Path -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 8d3d2c8..9e14cad 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -4,10 +4,10 @@ Snakemake rules for processing psp (partition dsp) tier data. - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data """ -from scripts.util.pars_loading import pars_catalog -from scripts.util.create_pars_keylist import pars_key_resolve -from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name -from scripts.util.patterns import ( +from legenddataflow.pars_loading import pars_catalog +from legenddataflow.create_pars_keylist import pars_key_resolve +from legenddataflow.utils import par_psp_path, par_dsp_path, set_last_rule_name +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index d7a10f4..982ab4e 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -1,7 +1,7 @@ -from scripts.library.pars_loading import ParsCatalog -from scripts.library.create_pars_keylist import ParsKeyResolve -from scripts.library.utils import filelist_path, set_last_rule_name -from scripts.library.patterns import ( +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.utils import filelist_path, set_last_rule_name +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 17d1e3b..f647095 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -1,12 +1,12 @@ -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_tier_daq_unsorted, get_pattern_tier_daq, get_pattern_tier, get_pattern_log, get_pattern_tier_raw_blind, ) -from scripts.library.utils import set_last_rule_name -from scripts.library.create_pars_keylist import ParsKeyResolve +from legenddataflow.utils import set_last_rule_name +from legenddataflow.create_pars_keylist import ParsKeyResolve raw_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index 404b81b..d3c5d51 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -2,7 +2,7 @@ Snakemake rules for processing skm tier. """ -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars, diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index 941455d..6fa85a9 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -2,7 +2,7 @@ Snakemake file containing the rules for generating the tcm """ -from scripts.library.patterns import ( +from legenddataflow.patterns import ( get_pattern_tier, get_pattern_log, get_pattern_pars_tmp_channel, From cf9e6bd79bbc9c59a60976c180b41b88aa529c1d Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 13:57:03 +0100 Subject: [PATCH 062/101] update profiles --- .gitignore | 2 ++ config-lngs.yaml | 3 ++- workflow/profiles/default/config.yaml | 1 - workflow/profiles/{build-raw => lngs-build-raw}/config.yaml | 2 +- workflow/profiles/{legend-data => lngs}/config.yaml | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) rename workflow/profiles/{build-raw => lngs-build-raw}/config.yaml (84%) rename workflow/profiles/{legend-data => lngs}/config.yaml (83%) diff --git a/.gitignore b/.gitignore index a904f40..b4586b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.snakemake + # New additions go at the top! *.c .DS_Store diff --git a/config-lngs.yaml b/config-lngs.yaml index 971399c..b14c913 100644 --- a/config-lngs.yaml +++ b/config-lngs.yaml @@ -11,6 +11,7 @@ setups: config: $_/inputs/dataprod/config par_overwrite: $_/inputs/dataprod/overrides chan_map: $_/inputs/hardware/configuration + detector_status: $_/inputs/datasets detector_db: $_/inputs/hardware/detectors tier: $_/generated/tier @@ -78,4 +79,4 @@ setups: - legend-pydataobj>=1.11.4 - legend-daq2lh5==1.4.* - meta_version: v0.5.7 + legend_metadata_version: v0.5.7 diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml index 53a11cd..ba92572 100644 --- a/workflow/profiles/default/config.yaml +++ b/workflow/profiles/default/config.yaml @@ -1,5 +1,4 @@ cores: all -configfile: config.json snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true diff --git a/workflow/profiles/build-raw/config.yaml b/workflow/profiles/lngs-build-raw/config.yaml similarity index 84% rename from workflow/profiles/build-raw/config.yaml rename to workflow/profiles/lngs-build-raw/config.yaml index 4525deb..73b5cb5 100644 --- a/workflow/profiles/build-raw/config.yaml +++ b/workflow/profiles/lngs-build-raw/config.yaml @@ -3,7 +3,7 @@ restart-times: 2 max-jobs-per-second: 1 resources: - mem_swap=3500 -configfile: config.json +configfile: config-lngs.yaml snakefile: ./workflow/Snakefile-build-raw keep-going: true rerun-incomplete: true diff --git a/workflow/profiles/legend-data/config.yaml b/workflow/profiles/lngs/config.yaml similarity index 83% rename from workflow/profiles/legend-data/config.yaml rename to workflow/profiles/lngs/config.yaml index 364bdb1..1f27969 100644 --- a/workflow/profiles/legend-data/config.yaml +++ b/workflow/profiles/lngs/config.yaml @@ -3,7 +3,7 @@ restart-times: 2 max-jobs-per-second: 1 resources: - mem_swap=3500 -configfile: config.json +configfile: config-lngs.yaml snakefile: ./workflow/Snakefile keep-going: true rerun-incomplete: true From f4bbffa6091d790501bf025cf3e2ab39819d8bf0 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 29 Jan 2025 13:57:23 +0100 Subject: [PATCH 063/101] switch to subrocess library --- workflow/src/legenddataflow/execenv.py | 37 ++++++++++++++++++++------ 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index 5fce213..fa91400 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -87,20 +87,41 @@ def install(args) -> None: shutil.rmtree(path_install) shutil.rmtree(path_cache) - pkg_list = "" + pkg_list = [] for spec in config_dic.setups.l200.pkg_versions: pkg = Requirement(spec).name if (path_src / pkg).exists(): - pkg_list += f" '{path_src / pkg}'" + pkg_list.append(str(path_src / pkg)) else: - pkg_list += f" '{spec}'" + pkg_list.append(spec) + + cmd_base = [ + *(exec_cmd.split()), + exec_arg, + "python3", + "-B", + "-m", + "pip", + "install", + "--no-warn-script-location", + ] + + cmd_expr = cmd_base + pkg_list + cmdenv = { + "PYTHONUSERBASE": path_install, + "PIP_CACHE_DIR": path_cache, + } + + print( + "INFO: running:", + " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + " " + " ".join(cmd_expr), + ) - cmd_expr = ( - f"PYTHONUSERBASE={path_install} PIP_CACHE_DIR={path_cache} " - f"{exec_cmd} {exec_arg} python3 -B -m pip install --no-warn-script-location {pkg_list}" + subprocess.run( + cmd_expr, + env=cmdenv, + check=True, ) - print("INFO: running:", cmd_expr) - os.system(cmd_expr) def cmdexec(args) -> None: From f7dbc325004ae87977f9600ae1d1dab7b817c5e2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 29 Jan 2025 18:48:51 +0100 Subject: [PATCH 064/101] remove smk functions --- pyproject.toml | 5 ++- workflow/src/legenddataflow/FileKey.py | 52 +++++++++++++++----------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ee2f40c..df67b42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,12 +51,15 @@ dynamic = ["version"] dependencies = [ "dbetto>=1.0.5", - "snakemake>=8.16", "pylegendmeta==1.2.0a2", ] [project.optional-dependencies] +full = [ +"snakemake>=8.16", +] no_container = [ + "legend_dataflow[full]", "pygama", "dspeed", "pylegendmeta==1.2.0a2", diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py index ca4573c..63a1842 100644 --- a/workflow/src/legenddataflow/FileKey.py +++ b/workflow/src/legenddataflow/FileKey.py @@ -3,11 +3,10 @@ """ import re +import string from collections import namedtuple from pathlib import Path -import snakemake as smk - from .patterns import ( full_channel_pattern_with_extension, get_pattern_tier, @@ -20,6 +19,18 @@ # +def regex_from_filepattern(filepattern): + f = [] + last = 0 + for match in re.compile(r"\{(?P[\w]+)\}").finditer(filepattern): + wildcard = match.group("name") + f.append(f"(?P={wildcard})") + last = match.end() + f.append(re.escape(filepattern[last:])) + f.append("$") + return "".join(f) + + class FileKey(namedtuple("FileKey", ["experiment", "period", "run", "datatype", "timestamp"])): __slots__ = () @@ -51,18 +62,12 @@ def get_filekey_from_filename(cls, filename): @classmethod def get_filekey_from_pattern(cls, filename, pattern=None): - if pattern is None: - try: - key_pattern_rx = re.compile(smk.io.regex_from_filepattern(cls.key_pattern)) - except AttributeError: - key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern)) - else: - if isinstance(pattern, Path): - pattern = pattern.as_posix() - try: - key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern)) - except AttributeError: - key_pattern_rx = re.compile(smk.io.regex(pattern)) + if isinstance(pattern, Path): + pattern = pattern.as_posix() + + key_pattern_rx = re.compile( + regex_from_filepattern(cls.key_pattern if pattern is None else pattern) + ) if key_pattern_rx.match(filename) is None: return None @@ -93,11 +98,14 @@ def parse_keypart(cls, keypart): d[key] = "*" return cls(**d) + def expand(self, file_pattern, **kwargs): + wildcard_dict = dict(**self._asdict(), **kwargs) + formatter = string.Formatter() + return [formatter.vformat(file_pattern, (), wildcard_dict)] + def get_path_from_filekey(self, pattern, **kwargs): - if isinstance(pattern, Path): - pattern = pattern.as_posix() if kwargs is None: - return smk.io.expand(pattern, **self._asdict()) + return self.expand(pattern, **kwargs) else: for entry, value in kwargs.items(): if isinstance(value, dict): @@ -105,7 +113,7 @@ def get_path_from_filekey(self, pattern, **kwargs): kwargs[entry] = value[next(iter(set(value).intersection(self._list())))] else: kwargs.pop(entry) - return smk.io.expand(pattern, **self._asdict(), **kwargs) + return self.expand(pattern, **kwargs) # get_path_from_key @classmethod @@ -172,7 +180,7 @@ def get_path_from_filekey(self, pattern, **kwargs): if not isinstance(pattern, str): pattern = pattern(self.tier, self.identifier) if kwargs is None: - return smk.io.expand(pattern, **self._asdict()) + return self.expand(pattern, **kwargs) else: for entry, value in kwargs.items(): if isinstance(value, dict): @@ -180,7 +188,7 @@ def get_path_from_filekey(self, pattern, **kwargs): kwargs[entry] = value[next(iter(set(value).intersection(self._list())))] else: kwargs.pop(entry) - return smk.io.expand(pattern, **self._asdict(), **kwargs) + return self.expand(pattern, **kwargs) class ChannelProcKey(FileKey): @@ -211,7 +219,9 @@ def get_channel_files(keypart, par_pattern, chan_list): for chan in chan_list: wildcards_dict = d._asdict() wildcards_dict.pop("channel") - file = smk.io.expand(par_pattern, **wildcards_dict, channel=chan)[0] + formatter = string.Formatter() + wildcards_dict["channel"] = chan + file = formatter.vformat(par_pattern, (), wildcards_dict) filenames.append(file) return filenames From 4e1cfd5c81b303fbdcf7ee5f7ca3eb2ac9c99c30 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 12:23:04 +0100 Subject: [PATCH 065/101] rewrite execenv functions to use uv. remove setups/l200 prefix in config --- .gitignore | 1 + .pre-commit-config.yaml | 2 +- config-lngs.yaml | 140 +++++++++-------- pyproject.toml | 49 +++--- workflow/Snakefile | 43 +++--- workflow/Snakefile-build-raw | 29 ++-- workflow/src/legenddataflow/__init__.py | 10 +- workflow/src/legenddataflow/execenv.py | 197 +++++++++++++++++------- workflow/src/legenddataflow/utils.py | 15 -- 9 files changed, 279 insertions(+), 207 deletions(-) diff --git a/.gitignore b/.gitignore index b4586b6..48a1541 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .snakemake +workflow/src/legenddataflow/_version.py # New additions go at the top! *.c diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b3a8b9..e369b65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -95,7 +95,7 @@ repos: rev: "v4.0.0-alpha.8" hooks: - id: prettier - types_or: [yaml, markdown, json] + types_or: [yaml, markdown, json, toml] - repo: https://github.com/snakemake/snakefmt rev: v0.10.2 diff --git a/config-lngs.yaml b/config-lngs.yaml index b14c913..faa456b 100644 --- a/config-lngs.yaml +++ b/config-lngs.yaml @@ -1,82 +1,80 @@ -setups: - l200: - paths: - sandbox_path: /data1/shared/l200-p13/sandbox - tier_daq: $_/../daq/generated/tier/daq - tier_raw_blind: "" +paths: + sandbox_path: /data1/shared/l200-p13/sandbox + tier_daq: $_/../daq/generated/tier/daq + tier_raw_blind: "" - workflow: $_/workflow + workflow: $_/workflow - metadata: $_/inputs - config: $_/inputs/dataprod/config - par_overwrite: $_/inputs/dataprod/overrides - chan_map: $_/inputs/hardware/configuration - detector_status: $_/inputs/datasets - detector_db: $_/inputs/hardware/detectors + metadata: $_/inputs + config: $_/inputs/dataprod/config + par_overwrite: $_/inputs/dataprod/overrides + chan_map: $_/inputs/hardware/configuration + detector_status: $_/inputs/datasets + detector_db: $_/inputs/hardware/detectors - tier: $_/generated/tier - tier_raw: $_/generated/tier/raw - tier_tcm: $_/generated/tier/tcm - tier_dsp: $_/generated/tier/dsp - tier_hit: $_/generated/tier/hit - tier_ann: $_/generated/tier/ann - tier_evt: $_/generated/tier/evt - tier_psp: $_/generated/tier/psp - tier_pht: $_/generated/tier/pht - tier_pan: $_/generated/tier/pan - tier_pet: $_/generated/tier/pet - tier_skm: $_/generated/tier/skm + tier: $_/generated/tier + tier_raw: $_/generated/tier/raw + tier_tcm: $_/generated/tier/tcm + tier_dsp: $_/generated/tier/dsp + tier_hit: $_/generated/tier/hit + tier_ann: $_/generated/tier/ann + tier_evt: $_/generated/tier/evt + tier_psp: $_/generated/tier/psp + tier_pht: $_/generated/tier/pht + tier_pan: $_/generated/tier/pan + tier_pet: $_/generated/tier/pet + tier_skm: $_/generated/tier/skm - par: $_/generated/par - par_raw: $_/generated/par/raw - par_tcm: $_/generated/par/tcm - par_dsp: $_/generated/par/dsp - par_hit: $_/generated/par/hit - par_evt: $_/generated/par/evt - par_psp: $_/generated/par/psp - par_pht: $_/generated/par/pht - par_pet: $_/generated/par/pet + par: $_/generated/par + par_raw: $_/generated/par/raw + par_tcm: $_/generated/par/tcm + par_dsp: $_/generated/par/dsp + par_hit: $_/generated/par/hit + par_evt: $_/generated/par/evt + par_psp: $_/generated/par/psp + par_pht: $_/generated/par/pht + par_pet: $_/generated/par/pet - plt: $_/generated/plt - log: $_/generated/log + plt: $_/generated/plt + log: $_/generated/log - tmp_plt: $_/generated/tmp/plt - tmp_log: $_/generated/tmp/log - tmp_filelists: $_/generated/tmp/filelists - tmp_par: $_/generated/tmp/par + tmp_plt: $_/generated/tmp/plt + tmp_log: $_/generated/tmp/log + tmp_filelists: $_/generated/tmp/filelists + tmp_par: $_/generated/tmp/par - src: $_/software/python/src - install: $_/software/python/install - cache: $_/software/python/cache + src: $_/software/python/src + install: $_/software/python/install + cache: $_/software/python/cache - table_format: - raw: ch{ch:07d}/raw - dsp: ch{ch:07d}/dsp - psp: ch{ch:07d}/dsp - hit: ch{ch:07d}/hit - pht: ch{ch:07d}/hit - evt: "{grp}/evt" - pet: "{grp}/evt" - skm: "{grp}/skm" - tcm: hardware_tcm_1 +table_format: + raw: ch{ch:07d}/raw + dsp: ch{ch:07d}/dsp + psp: ch{ch:07d}/dsp + hit: ch{ch:07d}/hit + pht: ch{ch:07d}/hit + evt: "{grp}/evt" + pet: "{grp}/evt" + skm: "{grp}/skm" + tcm: hardware_tcm_1 - execenv: - cmd: apptainer run - arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif - env: - PRODENV: $PRODENV - HDF5_USE_FILE_LOCKING: "False" - LGDO_BOUNDSCHECK: "false" - DSPEED_BOUNDSCHECK: "false" - PYGAMA_PARALLEL: "false" - PYGAMA_FASTMATH: "false" - DISABLE_TQDM: "True" +execenv: + cmd: apptainer run + arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20241110203225.sif + env: + PRODENV: $PRODENV + HDF5_USE_FILE_LOCKING: "False" + LGDO_BOUNDSCHECK: "false" + DSPEED_BOUNDSCHECK: "false" + PYGAMA_PARALLEL: "false" + PYGAMA_FASTMATH: "false" + DISABLE_TQDM: "True" - pkg_versions: - - pygama==2.0.* - - pylegendmeta==1.2.0a2 - - dspeed==1.6.* - - legend-pydataobj>=1.11.4 - - legend-daq2lh5==1.4.* +pkg_versions: + - pygama==2.0.* + - pylegendmeta==1.2.0a2 + - dspeed==1.6.* + - legend-pydataobj>=1.11.4 + - legend-daq2lh5==1.4.* - legend_metadata_version: v0.5.7 +legend_metadata_version: v0.5.7 diff --git a/pyproject.toml b/pyproject.toml index df67b42..944cfe5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,10 @@ exclude = ["generated", "inputs", "software", "workflow"] [build-system] -requires = ["setuptools>=61.0"] +requires = [ + "setuptools>=61.2", + "setuptools_scm[toml]>=7" +] build-backend = "setuptools.build_meta" [tool.setuptools] @@ -49,42 +52,38 @@ readme = "README.md" requires-python = ">=3.11" dynamic = ["version"] +# these are the dependencies strictly required by legend-dataflow dependencies = [ "dbetto>=1.0.5", "pylegendmeta==1.2.0a2", ] [project.optional-dependencies] -full = [ -"snakemake>=8.16", -] -no_container = [ - "legend_dataflow[full]", - "pygama", - "dspeed", - "pylegendmeta==1.2.0a2", - "legend-pydataobj", - "legend-daq2lh5", +# these are needed to run the data production +dataprod = [ + "snakemake>=8.16", + "pygama", + "dspeed", + "pylegendmeta==1.2.0a2", + "legend-pydataobj", + "legend-daq2lh5", ] test = [ - "legend_dataflow[no_container]", - "pytest >=6", - "pytest-cov >=3", + "legend_dataflow[dataprod]", + "pytest>=6", + "pytest-cov>=3", ] dev = [ - "legend_dataflow[no_container]", - "pytest >=6", - "pytest-cov >=3", - "pre-commit", + "legend_dataflow[dataprod,test]", + "pre-commit", ] docs = [ - "legend_dataflow[no_container]", - "sphinx>=7.0", - "myst_parser>=0.13", - "sphinx_inline_tabs", - "sphinx_copybutton", - "sphinx_autodoc_typehints", - "furo>=2023.08.17", + "sphinx>=7.0", + "myst_parser>=0.13", + "sphinx_inline_tabs", + "sphinx_copybutton", + "sphinx_autodoc_typehints", + "furo>=2023.08.17", ] [project.scripts] diff --git a/workflow/Snakefile b/workflow/Snakefile index 011cb05..9fa6950 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -25,20 +25,19 @@ from legenddataflow import utils utils.subst_vars_in_snakemake_config(workflow, config) check_in_cycle = True -setup = config["setups"]["l200"] -configs = utils.config_path(setup) -chan_maps = utils.chan_map_path(setup) -meta = utils.metadata_path(setup) -det_status = utils.det_status_path(setup) -swenv = utils.runcmd(setup) +configs = utils.config_path(config) +chan_maps = utils.chan_map_path(config) +meta = utils.metadata_path(config) +det_status = utils.det_status_path(config) +swenv = utils.runcmd(config) basedir = workflow.basedir # wait for new pylegendmeta release # if not Path(meta).exists(): # meta = LegendMetadata() -# meta.checkout(config["setups"]["l200"]["legend_metadata_version"]) +# meta.checkout(config["configs"]["l200"]["legend_metadata_version"]) -part = CalGrouping(setup, Path(det_status) / "cal_groupings.yaml") +part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml") wildcard_constraints: @@ -81,7 +80,7 @@ onstart: shell('{swenv} python3 -B -c "import ' + pkg + '"') # Log parameter catalogs in validity.jsonl files - hit_par_cat_file = Path(utils.pars_path(setup)) / "hit" / "validity.yaml" + hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml" if hit_par_cat_file.is_file(): hit_par_cat_file.unlink() try: @@ -90,7 +89,7 @@ onstart: except NameError: print("No hit parameter catalog found") - pht_par_cat_file = Path(utils.pars_path(setup)) / "pht" / "validity.yaml" + pht_par_cat_file = Path(utils.pars_path(config)) / "pht" / "validity.yaml" if pht_par_cat_file.is_file(): pht_par_cat_file.unlink() try: @@ -99,7 +98,7 @@ onstart: except NameError: print("No pht parameter catalog found") - dsp_par_cat_file = Path(utils.pars_path(setup)) / "dsp" / "validity.yaml" + dsp_par_cat_file = Path(utils.pars_path(config)) / "dsp" / "validity.yaml" if dsp_par_cat_file.is_file(): dsp_par_cat_file.unlink() try: @@ -108,7 +107,7 @@ onstart: except NameError: print("No dsp parameter catalog found") - psp_par_cat_file = Path(utils.pars_path(setup)) / "psp" / "validity.yaml" + psp_par_cat_file = Path(utils.pars_path(config)) / "psp" / "validity.yaml" if psp_par_cat_file.is_file(): psp_par_cat_file.unlink() try: @@ -121,7 +120,7 @@ onstart: onsuccess: from snakemake.report import auto_report - rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" + rep_dir = f"{log_path(config)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}" Path(rep_dir).mkdir(parents=True, exist_ok=True) # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html") @@ -140,24 +139,24 @@ onsuccess: os.remove(file) # remove filelists - files = glob.glob(os.path.join(utils.filelist_path(setup), "*")) + files = glob.glob(os.path.join(utils.filelist_path(config), "*")) for file in files: if os.path.isfile(file): os.remove(file) - if os.path.exists(utils.filelist_path(setup)): - os.rmdir(utils.filelist_path(setup)) + if os.path.exists(utils.filelist_path(config)): + os.rmdir(utils.filelist_path(config)) # remove logs - files = glob.glob(os.path.join(utils.tmp_log_path(setup), "*", "*.log")) + files = glob.glob(os.path.join(utils.tmp_log_path(config), "*", "*.log")) for file in files: if os.path.isfile(file): os.remove(file) - dirs = glob.glob(os.path.join(utils.tmp_log_path(setup), "*")) + dirs = glob.glob(os.path.join(utils.tmp_log_path(config), "*")) for d in dirs: if os.path.isdir(d): os.rmdir(d) - if os.path.exists(utils.tmp_log_path(setup)): - os.rmdir(utils.tmp_log_path(setup)) + if os.path.exists(utils.tmp_log_path(config)): + os.rmdir(utils.tmp_log_path(config)) rule gen_filelist: @@ -171,12 +170,12 @@ rule gen_filelist: input: lambda wildcards: get_filelist( wildcards, - setup, + config, get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"), + temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"), script: "scripts/write_filelist.py" diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index 763cb8a..abb1b34 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -9,24 +9,19 @@ to the blinded raw data. It handles: import os, sys from pathlib import Path from legenddataflow import patterns as patt -from legenddataflow import utils, ParsKeyResolve - -check_in_cycle = True +from legenddataflow import utils, execenv, ParsKeyResolve utils.subst_vars_in_snakemake_config(workflow, config) -setup = config["setups"]["l200"] -configs = utils.config_path(setup) -chan_maps = utils.chan_map_path(setup) -swenv = utils.runcmd(setup) -meta = utils.metadata_path(setup) -det_status = utils.det_status_path(setup) -basedir = workflow.basedir +check_in_cycle = True +swenv = execenv.execenv_prefix(config) +# meta = utils.metadata_path(config) +det_status = utils.det_status_path(config) # wait for new pylegendmeta release # if not Path(meta).exists(): # meta = LegendMetadata() -# meta.checkout(config["setups"]["l200"]["legend_metadata_version"]) +# meta.checkout(config["configs"]["l200"]["legend_metadata_version"]) wildcard_constraints: @@ -56,7 +51,7 @@ onstart: shell('{swenv} python3 -B -c "import daq2lh5 "') - raw_par_cat_file = Path(utils.pars_path(setup)) / "raw" / "validity.yaml" + raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml" if raw_par_cat_file.is_file(): raw_par_cat_file.unlink() try: @@ -69,20 +64,20 @@ onstart: onsuccess: print("Workflow finished, no error") shell("rm *.gen || true") - shell(f"rm {utils.filelist_path(setup)}/* || true") + shell(f"rm {utils.filelist_path(config)}/* || true") rule gen_filelist: input: lambda wildcards: get_filelist( wildcards, - setup, + config, get_search_pattern(wildcards.tier), ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml", analysis_runs_file=Path(det_status) / "runlists.yaml", ), output: - temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"), + temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"), script: "scripts/write_filelist.py" @@ -93,9 +88,9 @@ rule sort_data: to the sorted dirs under generated """ input: - patt.get_pattern_tier_daq_unsorted(setup, extension="fcio"), + patt.get_pattern_tier_daq_unsorted(config, extension="fcio"), output: - patt.get_pattern_tier_daq(setup, extension="fcio"), + patt.get_pattern_tier_daq(config, extension="fcio"), shell: "mv {input} {output}" diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py index f812d11..223914e 100644 --- a/workflow/src/legenddataflow/__init__.py +++ b/workflow/src/legenddataflow/__init__.py @@ -1,9 +1,13 @@ from .cal_grouping import CalGrouping from .create_pars_keylist import ParsKeyResolve +from .execenv import ( + execenv_prefix, + execenv_python, + execenv_smk_py_script, +) from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey from .pars_loading import ParsCatalog from .utils import ( - runcmd, subst_vars, subst_vars_impl, subst_vars_in_snakemake_config, @@ -17,7 +21,9 @@ "ParsCatalog", "ParsKeyResolve", "ProcessingFileKey", - "runcmd", + "execenv_prefix", + "execenv_python", + "execenv_smk_py_script", "subst_vars", "subst_vars", "subst_vars_impl", diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index fa91400..b119dd1 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -1,16 +1,70 @@ -# ruff: noqa: T201 from __future__ import annotations import argparse +import logging import os +import shlex import shutil import string import subprocess from pathlib import Path import dbetto +from dbetto import AttrsDict from packaging.requirements import Requirement +from . import utils + +log = logging.getLogger(__name__) + + +def execenv_prefix(config, aslist=False): + """Returns the software environment command prefix. + + For example: `apptainer run image.sif` + """ + config = AttrsDict(config) + + cmdline = shlex.split(config.execenv.cmd) + if "env" in config.execenv: + cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()] + + cmdline += shlex.split(config.execenv.arg) + + if aslist: + return cmdline + return " ".join(cmdline) + + +def execenv_python(config, aslist=False): + """Returns the Python interpreter command. + + For example: `apptainer run image.sif python` + """ + config = AttrsDict(config) + + cmdline = execenv_prefix(config, aslist=True) + cmdline.append(f"{config.paths.install}/bin/python") + + if aslist: + return cmdline + return " ".join(cmdline) + + +def execenv_smk_py_script(workflow, config, scriptname, aslist=False): + """Returns the command used to run a Python script for a Snakemake rule. + + For example: `apptainer run image.sif python path/to/script.py` + """ + config = AttrsDict(config) + + cmdline = execenv_python(config, aslist=True) + cmdline.append(f"{workflow.basedir}/scripts/{scriptname}") + + if aslist: + return cmdline + return " ".join(cmdline) + def dataprod() -> None: """dataprod's command-line interface for installing and loading the software in the data production environment. @@ -57,38 +111,76 @@ def install(args) -> None: format: ```yaml - setups: - l200: - pkg_versions: - - python_package_spec + pkg_versions: + - python_package_spec + - ... ``` """ - print(args.config_file) - if not Path(args.config_file).is_file(): - msg = "config file is not a regular file" - raise RuntimeError(msg) + config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file)) + config_loc = Path(args.config_file).resolve().parent + path_install = config_dict.paths.install - config_file_dir = Path(args.config_file).resolve().parent - config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file)) + if args.r and Path(path_install).exists(): + shutil.rmtree(path_install) - exec_cmd = config_dic.setups.l200.execenv.cmd - exec_arg = config_dic.setups.l200.execenv.arg - path_src = config_dic.setups.l200.paths.src - path_install = config_dic.setups.l200.paths.install - path_cache = config_dic.setups.l200.paths.cache + utils.subst_vars( + config_dict, + var_values={"_": config_loc}, + use_env=True, + ignore_missing=False, + ) - exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) - exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) - path_src = Path(string.Template(path_src).substitute({"_": config_file_dir})) - path_install = Path(string.Template(path_install).substitute({"_": config_file_dir})) - path_cache = Path(string.Template(path_cache).substitute({"_": config_file_dir})) + cmd_env = {} - if args.r: - shutil.rmtree(path_install) - shutil.rmtree(path_cache) + def _runcmd(cmd_env, cmd_expr): + msg = ( + "running:" + + " ".join([f"{k}={v}" for k, v in cmd_env.items()]) + + " " + + " ".join(cmd_expr), + ) + log.debug(msg) + + subprocess.run(cmd_expr, env=cmd_env, check=True) + # configure venv + cmd_expr = [*execenv_prefix(config_dict, aslist=True), "python3", "-m", "venv", path_install] + + log.info(f"configuring virtual environment in {path_install}") + _runcmd(cmd_env, cmd_expr) + + cmd_expr = [ + *execenv_python(config_dict, aslist=True), + "-m", + "pip", + "--no-cache-dir", + "install", + "--upgrade", + "pip", + ] + + log.info("upgrading pip") + _runcmd(cmd_env, cmd_expr) + + # install uv + cmd_expr = [ + *execenv_python(config_dict, aslist=True), + "-m", + "pip", + "--no-cache-dir", + "install", + "--no-warn-script-location", + "uv", + ] + + log.info("installing uv") + _runcmd(cmd_env, cmd_expr) + + # now packages + + path_src = Path(config_dict.paths.src) pkg_list = [] - for spec in config_dic.setups.l200.pkg_versions: + for spec in config_dict.pkg_versions: pkg = Requirement(spec).name if (path_src / pkg).exists(): pkg_list.append(str(path_src / pkg)) @@ -96,50 +188,47 @@ def install(args) -> None: pkg_list.append(spec) cmd_base = [ - *(exec_cmd.split()), - exec_arg, - "python3", - "-B", + *execenv_python(config_dict, aslist=True), "-m", + "uv", "pip", + "--no-cache", "install", - "--no-warn-script-location", ] cmd_expr = cmd_base + pkg_list - cmdenv = { - "PYTHONUSERBASE": path_install, - "PIP_CACHE_DIR": path_cache, - } - - print( - "INFO: running:", - " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + " " + " ".join(cmd_expr), - ) - subprocess.run( - cmd_expr, - env=cmdenv, - check=True, - ) + log.info("installing packages") + _runcmd(cmd_env, cmd_expr) + + # and finally legenddataflow + + cmd_expr = [ + *execenv_python(config_dict, aslist=True), + "-m", + "uv", + "pip", + "--no-cache", + "install", + # "--editable", # TODO do we really want this? + str(config_loc), + ] + + log.info("installing packages") + _runcmd(cmd_env, cmd_expr) def cmdexec(args) -> None: """ This function loads the data production environment and executes a given command. """ - - if not Path(args.config_file).is_file(): - msg = "config file is not a regular file" - raise RuntimeError(msg) - config_file_dir = Path(args.config_file).resolve().parent - config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file)) + config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file)) - exec_cmd = config_dic.setups.l200.execenv.cmd - exec_arg = config_dic.setups.l200.execenv.arg - env_vars = config_dic.setups.l200.execenv.env - path_install = config_dic.setups.l200.paths.install + exec_cmd = config_dict.execenv.cmd + exec_arg = config_dict.execenv.arg + env_vars = config_dict.execenv.env + path_install = config_dict.paths.install exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) diff --git a/workflow/src/legenddataflow/utils.py b/workflow/src/legenddataflow/utils.py index 0b45a81..6bcbb01 100644 --- a/workflow/src/legenddataflow/utils.py +++ b/workflow/src/legenddataflow/utils.py @@ -8,7 +8,6 @@ import copy import os import re -import shlex import string from datetime import datetime from pathlib import Path @@ -108,20 +107,6 @@ def filelist_path(setup): return setup["paths"]["tmp_filelists"] -def runcmd(setup, aslist=False): - cmdline = shlex.split(setup["execenv"]["cmd"]) - cmdline += ["--env=" + "PYTHONUSERBASE=" + f"{setup['paths']['install']}"] - if "env" in setup["execenv"]: - cmdline += [f"--env={var}={val}" for var, val in setup["execenv"]["env"].items()] - - cmdline += shlex.split(setup["execenv"]["arg"]) - - if aslist: - return cmdline - - return " ".join(cmdline) - - def subst_vars_impl(x, var_values, ignore_missing=False): if isinstance(x, str): if "$" in x: From c27b1b1c51d42922730a82f3c4808b5efce44df6 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 13:49:32 +0100 Subject: [PATCH 066/101] install uv only if not already available --- workflow/src/legenddataflow/execenv.py | 84 ++++++++++++++++---------- 1 file changed, 53 insertions(+), 31 deletions(-) diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index b119dd1..c49d04f 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -118,10 +118,6 @@ def install(args) -> None: """ config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file)) config_loc = Path(args.config_file).resolve().parent - path_install = config_dict.paths.install - - if args.r and Path(path_install).exists(): - shutil.rmtree(path_install) utils.subst_vars( config_dict, @@ -130,6 +126,11 @@ def install(args) -> None: ignore_missing=False, ) + path_install = config_dict.paths.install + + if args.r and Path(path_install).exists(): + shutil.rmtree(path_install) + cmd_env = {} def _runcmd(cmd_env, cmd_expr): @@ -143,38 +144,59 @@ def _runcmd(cmd_env, cmd_expr): subprocess.run(cmd_expr, env=cmd_env, check=True) + has_uv = False + try: + subprocess.run( + [*execenv_prefix(config_dict, aslist=True), "uv", "--version"], + capture_output=True, + check=True, + ) + has_uv = True + except (subprocess.CalledProcessError, FileNotFoundError): + pass + # configure venv - cmd_expr = [*execenv_prefix(config_dict, aslist=True), "python3", "-m", "venv", path_install] + if has_uv: + cmd_expr = [*execenv_prefix(config_dict, aslist=True), "uv", "venv", path_install] + else: + cmd_expr = [ + *execenv_prefix(config_dict, aslist=True), + "python3", + "-m", + "venv", + path_install, + ] log.info(f"configuring virtual environment in {path_install}") _runcmd(cmd_env, cmd_expr) - cmd_expr = [ - *execenv_python(config_dict, aslist=True), - "-m", - "pip", - "--no-cache-dir", - "install", - "--upgrade", - "pip", - ] - - log.info("upgrading pip") - _runcmd(cmd_env, cmd_expr) - - # install uv - cmd_expr = [ - *execenv_python(config_dict, aslist=True), - "-m", - "pip", - "--no-cache-dir", - "install", - "--no-warn-script-location", - "uv", - ] - - log.info("installing uv") - _runcmd(cmd_env, cmd_expr) + if not has_uv: + cmd_expr = [ + *execenv_python(config_dict, aslist=True), + "-m", + "pip", + "--no-cache-dir", + "install", + "--upgrade", + "pip", + ] + + log.info("upgrading pip") + _runcmd(cmd_env, cmd_expr) + + # install uv + cmd_expr = [ + *execenv_python(config_dict, aslist=True), + "-m", + "pip", + "--no-cache-dir", + "install", + "--no-warn-script-location", + "uv", + ] + + log.info("installing uv") + _runcmd(cmd_env, cmd_expr) # now packages From 6ec902d3023a2f5f1f7fbf56623ecfe4b966f244 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 14:07:39 +0100 Subject: [PATCH 067/101] improve config-lngs.yaml --- config-lngs.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/config-lngs.yaml b/config-lngs.yaml index faa456b..356aa4e 100644 --- a/config-lngs.yaml +++ b/config-lngs.yaml @@ -44,8 +44,7 @@ paths: tmp_par: $_/generated/tmp/par src: $_/software/python/src - install: $_/software/python/install - cache: $_/software/python/cache + install: $_/.snakemake/legend-dataflow/venv table_format: raw: ch{ch:07d}/raw @@ -59,7 +58,7 @@ table_format: tcm: hardware_tcm_1 execenv: - cmd: apptainer run + cmd: apptainer exec arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20241110203225.sif env: PRODENV: $PRODENV From cf093d8e743337eac71caf4a63a8ef8eb4a3fb79 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 14:43:29 +0100 Subject: [PATCH 068/101] also fix dataprod exec --- pyproject.toml | 1 + workflow/src/legenddataflow/execenv.py | 150 ++++++++++++------------- 2 files changed, 74 insertions(+), 77 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 944cfe5..06120d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dynamic = ["version"] # these are the dependencies strictly required by legend-dataflow dependencies = [ + "colorlog", "dbetto>=1.0.5", "pylegendmeta==1.2.0a2", ] diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index c49d04f..2bf433c 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -5,10 +5,10 @@ import os import shlex import shutil -import string import subprocess from pathlib import Path +import colorlog import dbetto from dbetto import AttrsDict from packaging.requirements import Requirement @@ -27,13 +27,19 @@ def execenv_prefix(config, aslist=False): cmdline = shlex.split(config.execenv.cmd) if "env" in config.execenv: + # FIXME: this is not portable, only works with Apptainer and Docker cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()] + cmdenv = {} + xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR") + if xdg_runtime_dir: + cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir + cmdline += shlex.split(config.execenv.arg) if aslist: - return cmdline - return " ".join(cmdline) + return cmdline, cmdenv + return " ".join(cmdline), cmdenv def execenv_python(config, aslist=False): @@ -43,12 +49,12 @@ def execenv_python(config, aslist=False): """ config = AttrsDict(config) - cmdline = execenv_prefix(config, aslist=True) + cmdline, cmdenv = execenv_prefix(config, aslist=True) cmdline.append(f"{config.paths.install}/bin/python") if aslist: - return cmdline - return " ".join(cmdline) + return cmdline, cmdenv + return " ".join(cmdline), cmdenv def execenv_smk_py_script(workflow, config, scriptname, aslist=False): @@ -58,12 +64,12 @@ def execenv_smk_py_script(workflow, config, scriptname, aslist=False): """ config = AttrsDict(config) - cmdline = execenv_python(config, aslist=True) + cmdline, cmdenv = execenv_python(config, aslist=True) cmdline.append(f"{workflow.basedir}/scripts/{scriptname}") if aslist: - return cmdline - return " ".join(cmdline) + return cmdline, cmdenv + return " ".join(cmdline), cmdenv def dataprod() -> None: @@ -79,15 +85,18 @@ def dataprod() -> None: prog="dataprod", description="dataprod's command-line interface" ) + parser.add_argument("-v", "--verbose", help="increase verbosity", action="store_true") + subparsers = parser.add_subparsers() parser_install = subparsers.add_parser( "install", help="install user software in data production environment" ) + parser_install.add_argument("config_file", help="production cycle configuration file") parser_install.add_argument( - "config_file", help="production cycle configuration file", type=str - ) - parser_install.add_argument( - "-r", help="remove software directory before installing software", action="store_true" + "-r", + "--remove", + help="remove software directory before installing software", + action="store_true", ) parser_install.set_defaults(func=install) @@ -101,6 +110,17 @@ def dataprod() -> None: parser_exec.set_defaults(func=cmdexec) args = parser.parse_args() + + if args.verbose: + handler = colorlog.StreamHandler() + handler.setFormatter( + colorlog.ColoredFormatter("%(log_color)s%(name)s [%(levelname)s] %(message)s") + ) + + logger = logging.getLogger("legenddataflow") + logger.setLevel(logging.DEBUG) + logger.addHandler(handler) + args.func(args) @@ -128,28 +148,28 @@ def install(args) -> None: path_install = config_dict.paths.install - if args.r and Path(path_install).exists(): + if args.remove and Path(path_install).exists(): shutil.rmtree(path_install) - cmd_env = {} - - def _runcmd(cmd_env, cmd_expr): + def _runcmd(cmd_expr, cmd_env, **kwargs): msg = ( - "running:" + "running: " + " ".join([f"{k}={v}" for k, v in cmd_env.items()]) + " " + " ".join(cmd_expr), ) log.debug(msg) - subprocess.run(cmd_expr, env=cmd_env, check=True) + subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs) + + cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True) has_uv = False try: - subprocess.run( - [*execenv_prefix(config_dict, aslist=True), "uv", "--version"], + _runcmd( + [*cmd_prefix, "uv", "--version"], + cmd_env, capture_output=True, - check=True, ) has_uv = True except (subprocess.CalledProcessError, FileNotFoundError): @@ -157,22 +177,18 @@ def _runcmd(cmd_env, cmd_expr): # configure venv if has_uv: - cmd_expr = [*execenv_prefix(config_dict, aslist=True), "uv", "venv", path_install] + cmd_expr = [*cmd_prefix, "uv", "venv", path_install] else: - cmd_expr = [ - *execenv_prefix(config_dict, aslist=True), - "python3", - "-m", - "venv", - path_install, - ] + cmd_expr = [*cmd_prefix, "python3", "-m", "venv", path_install] log.info(f"configuring virtual environment in {path_install}") - _runcmd(cmd_env, cmd_expr) + _runcmd(cmd_expr, cmd_env) + + python, cmd_env = execenv_python(config_dict, aslist=True) if not has_uv: cmd_expr = [ - *execenv_python(config_dict, aslist=True), + *python, "-m", "pip", "--no-cache-dir", @@ -182,11 +198,11 @@ def _runcmd(cmd_env, cmd_expr): ] log.info("upgrading pip") - _runcmd(cmd_env, cmd_expr) + _runcmd(cmd_expr, cmd_env) # install uv cmd_expr = [ - *execenv_python(config_dict, aslist=True), + *python, "-m", "pip", "--no-cache-dir", @@ -196,7 +212,7 @@ def _runcmd(cmd_env, cmd_expr): ] log.info("installing uv") - _runcmd(cmd_env, cmd_expr) + _runcmd(cmd_expr, cmd_env) # now packages @@ -209,24 +225,17 @@ def _runcmd(cmd_env, cmd_expr): else: pkg_list.append(spec) - cmd_base = [ - *execenv_python(config_dict, aslist=True), - "-m", - "uv", - "pip", - "--no-cache", - "install", - ] + cmd_base = [*python, "-m", "uv", "pip", "--no-cache", "install"] cmd_expr = cmd_base + pkg_list log.info("installing packages") - _runcmd(cmd_env, cmd_expr) + _runcmd(cmd_expr, cmd_env) # and finally legenddataflow cmd_expr = [ - *execenv_python(config_dict, aslist=True), + *python, "-m", "uv", "pip", @@ -237,45 +246,32 @@ def _runcmd(cmd_env, cmd_expr): ] log.info("installing packages") - _runcmd(cmd_env, cmd_expr) + _runcmd(cmd_expr, cmd_env) def cmdexec(args) -> None: """ This function loads the data production environment and executes a given command. """ - config_file_dir = Path(args.config_file).resolve().parent config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file)) + config_loc = Path(args.config_file).resolve().parent - exec_cmd = config_dict.execenv.cmd - exec_arg = config_dict.execenv.arg - env_vars = config_dict.execenv.env - path_install = config_dict.paths.install + utils.subst_vars( + config_dict, + var_values={"_": config_loc}, + use_env=True, + ignore_missing=False, + ) - exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir}) - exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir}) - path_install = string.Template(path_install).substitute({"_": config_file_dir}) + cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True) + cmd_expr = [*cmd_prefix, *args.command] - xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR") - if xdg_runtime_dir: - subprocess.run( - [*(exec_cmd.split()), exec_arg, *args.command], - env=dict( - PYTHONUSERBASE=path_install, - APPTAINERENV_APPEND_PATH=f":{path_install}/bin", - APPTAINER_BINDPATH=xdg_runtime_dir, - **env_vars, - ), - check=True, - ) - else: - subprocess.run( - [*(exec_cmd.split()), exec_arg, *args.command], - env=dict( - PYTHONUSERBASE=path_install, - APPTAINERENV_APPEND_PATH=f":{path_install}/bin", - APPTAINER_BINDPATH=xdg_runtime_dir, - **env_vars, - ), - check=True, - ) + msg = ( + "running: " + + " ".join([f"{k}={v}" for k, v in cmd_env.items()]) + + " " + + " ".join(cmd_expr), + ) + log.debug(msg) + + subprocess.run(cmd_expr, env=cmd_env, check=True) From a3d34d6ac9de58bbf6dc3b9d5ecf30d11e46e821 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 14:59:13 +0100 Subject: [PATCH 069/101] config.execenv can be left unspecified --- workflow/src/legenddataflow/execenv.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index 2bf433c..ad54d11 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -25,17 +25,21 @@ def execenv_prefix(config, aslist=False): """ config = AttrsDict(config) - cmdline = shlex.split(config.execenv.cmd) - if "env" in config.execenv: - # FIXME: this is not portable, only works with Apptainer and Docker - cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()] - - cmdenv = {} - xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR") - if xdg_runtime_dir: - cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir - - cmdline += shlex.split(config.execenv.arg) + if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv: + cmdline = shlex.split(config.execenv.cmd) + if "env" in config.execenv: + # FIXME: this is not portable, only works with Apptainer and Docker + cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()] + + cmdenv = {} + xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR") + if xdg_runtime_dir: + cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir + + cmdline += shlex.split(config.execenv.arg) + else: + cmdenv = {} + cmdline = [] if aslist: return cmdline, cmdenv From a0d68420413c9bf77a969670188455e647f1f348 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 15:05:10 +0100 Subject: [PATCH 070/101] move scripts in legenddataflow package and make imports relative --- pyproject.toml | 2 +- workflow/Snakefile-build-raw | 8 +++----- workflow/src/legenddataflow/__init__.py | 4 ---- workflow/{ => src/legenddataflow}/scripts/__init__.py | 0 .../legenddataflow}/scripts/blinding_calibration.py | 0 workflow/{ => src/legenddataflow}/scripts/build_dsp.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/build_evt.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/build_fdb.py | 0 workflow/{ => src/legenddataflow}/scripts/build_hit.py | 3 ++- .../{ => src/legenddataflow}/scripts/build_raw_blind.py | 3 ++- .../{ => src/legenddataflow}/scripts/build_raw_fcio.py | 3 ++- .../{ => src/legenddataflow}/scripts/build_raw_orca.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/build_skm.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/build_tcm.py | 3 ++- .../{ => src/legenddataflow}/scripts/check_blinding.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/complete_run.py | 6 +++--- .../legenddataflow}/scripts/create_chankeylist.py | 0 .../{ => src/legenddataflow}/scripts/merge_channels.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/par_psp_geds.py | 3 ++- .../legenddataflow}/scripts/pars_dsp_build_svm_geds.py | 3 ++- .../legenddataflow}/scripts/pars_dsp_dplms_geds.py | 3 ++- .../legenddataflow}/scripts/pars_dsp_eopt_geds.py | 3 ++- .../legenddataflow}/scripts/pars_dsp_evtsel_geds.py | 3 ++- .../legenddataflow}/scripts/pars_dsp_nopt_geds.py | 3 ++- .../{ => src/legenddataflow}/scripts/pars_dsp_svm_geds.py | 0 .../{ => src/legenddataflow}/scripts/pars_dsp_tau_geds.py | 3 ++- workflow/{ => src/legenddataflow}/scripts/pars_hit_aoe.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_hit_ecal.py | 5 +++-- workflow/{ => src/legenddataflow}/scripts/pars_hit_lq.py | 5 +++-- workflow/{ => src/legenddataflow}/scripts/pars_hit_qc.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_pht_aoecal.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_pht_fast.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_pht_lqcal.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_pht_partcal.py | 5 +++-- workflow/{ => src/legenddataflow}/scripts/pars_pht_qc.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_pht_qc_phy.py | 5 +++-- .../{ => src/legenddataflow}/scripts/pars_tcm_pulser.py | 3 ++- .../{ => src/legenddataflow}/scripts/write_filelist.py | 0 38 files changed, 73 insertions(+), 51 deletions(-) rename workflow/{ => src/legenddataflow}/scripts/__init__.py (100%) rename workflow/{ => src/legenddataflow}/scripts/blinding_calibration.py (100%) rename workflow/{ => src/legenddataflow}/scripts/build_dsp.py (99%) rename workflow/{ => src/legenddataflow}/scripts/build_evt.py (99%) rename workflow/{ => src/legenddataflow}/scripts/build_fdb.py (100%) rename workflow/{ => src/legenddataflow}/scripts/build_hit.py (98%) rename workflow/{ => src/legenddataflow}/scripts/build_raw_blind.py (99%) rename workflow/{ => src/legenddataflow}/scripts/build_raw_fcio.py (98%) rename workflow/{ => src/legenddataflow}/scripts/build_raw_orca.py (98%) rename workflow/{ => src/legenddataflow}/scripts/build_skm.py (98%) rename workflow/{ => src/legenddataflow}/scripts/build_tcm.py (97%) rename workflow/{ => src/legenddataflow}/scripts/check_blinding.py (98%) rename workflow/{ => src/legenddataflow}/scripts/complete_run.py (98%) rename workflow/{ => src/legenddataflow}/scripts/create_chankeylist.py (100%) rename workflow/{ => src/legenddataflow}/scripts/merge_channels.py (99%) rename workflow/{ => src/legenddataflow}/scripts/par_psp_geds.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_build_svm_geds.py (97%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_dplms_geds.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_eopt_geds.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_evtsel_geds.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_nopt_geds.py (98%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_svm_geds.py (100%) rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_tau_geds.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_hit_aoe.py (98%) rename workflow/{ => src/legenddataflow}/scripts/pars_hit_ecal.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_hit_lq.py (95%) rename workflow/{ => src/legenddataflow}/scripts/pars_hit_qc.py (98%) rename workflow/{ => src/legenddataflow}/scripts/pars_pht_aoecal.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_pht_fast.py (98%) rename workflow/{ => src/legenddataflow}/scripts/pars_pht_lqcal.py (96%) rename workflow/{ => src/legenddataflow}/scripts/pars_pht_partcal.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_pht_qc.py (99%) rename workflow/{ => src/legenddataflow}/scripts/pars_pht_qc_phy.py (98%) rename workflow/{ => src/legenddataflow}/scripts/pars_tcm_pulser.py (97%) rename workflow/{ => src/legenddataflow}/scripts/write_filelist.py (100%) diff --git a/pyproject.toml b/pyproject.toml index 06120d2..27e62e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,4 +88,4 @@ docs = [ ] [project.scripts] -dataprod = "legenddataflow.execenv:dataprod" +dataprod = "legenddataflow.library.execenv:dataprod" diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index abb1b34..fafd20c 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -15,13 +15,11 @@ utils.subst_vars_in_snakemake_config(workflow, config) check_in_cycle = True swenv = execenv.execenv_prefix(config) -# meta = utils.metadata_path(config) +meta_path = utils.metadata_path(config) det_status = utils.det_status_path(config) -# wait for new pylegendmeta release -# if not Path(meta).exists(): -# meta = LegendMetadata() -# meta.checkout(config["configs"]["l200"]["legend_metadata_version"]) +if not Path(meta_path).exists(): + LegendMetadata(meta_path).checkout(config["legend_metadata_version"]) wildcard_constraints: diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py index 223914e..ca8ddbb 100644 --- a/workflow/src/legenddataflow/__init__.py +++ b/workflow/src/legenddataflow/__init__.py @@ -9,7 +9,6 @@ from .pars_loading import ParsCatalog from .utils import ( subst_vars, - subst_vars_impl, subst_vars_in_snakemake_config, unix_time, ) @@ -25,9 +24,6 @@ "execenv_python", "execenv_smk_py_script", "subst_vars", - "subst_vars", - "subst_vars_impl", "subst_vars_in_snakemake_config", "unix_time", - "unix_time", ] diff --git a/workflow/scripts/__init__.py b/workflow/src/legenddataflow/scripts/__init__.py similarity index 100% rename from workflow/scripts/__init__.py rename to workflow/src/legenddataflow/scripts/__init__.py diff --git a/workflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py similarity index 100% rename from workflow/scripts/blinding_calibration.py rename to workflow/src/legenddataflow/scripts/blinding_calibration.py diff --git a/workflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py similarity index 99% rename from workflow/scripts/build_dsp.py rename to workflow/src/legenddataflow/scripts/build_dsp.py index f6e44df..137eb1f 100644 --- a/workflow/scripts/build_dsp.py +++ b/workflow/src/legenddataflow/scripts/build_dsp.py @@ -8,10 +8,11 @@ from dbetto import TextDB from dbetto.catalog import Props from dspeed import build_dsp -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo import lh5 +from ..log import build_log + def replace_list_with_array(dic): for key, value in dic.items(): diff --git a/workflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py similarity index 99% rename from workflow/scripts/build_evt.py rename to workflow/src/legenddataflow/scripts/build_evt.py index 5eac164..b0bf2a4 100644 --- a/workflow/scripts/build_evt.py +++ b/workflow/src/legenddataflow/scripts/build_evt.py @@ -6,11 +6,12 @@ import lgdo.lh5 as lh5 import numpy as np from dbetto import Props, TextDB -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.types import Array from pygama.evt import build_evt +from ..log import build_log + sto = lh5.LH5Store() diff --git a/workflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py similarity index 100% rename from workflow/scripts/build_fdb.py rename to workflow/src/legenddataflow/scripts/build_fdb.py diff --git a/workflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py similarity index 98% rename from workflow/scripts/build_hit.py rename to workflow/src/legenddataflow/scripts/build_hit.py index f096e0c..4f31947 100644 --- a/workflow/scripts/build_hit.py +++ b/workflow/src/legenddataflow/scripts/build_hit.py @@ -3,11 +3,12 @@ from pathlib import Path from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata, TextDB from lgdo import lh5 from pygama.hit.build_hit import build_hit +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) argparser.add_argument("--pars_file", help="hit pars file", nargs="*") diff --git a/workflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py similarity index 99% rename from workflow/scripts/build_raw_blind.py rename to workflow/src/legenddataflow/scripts/build_raw_blind.py index 5d582d4..ef704dd 100644 --- a/workflow/scripts/build_raw_blind.py +++ b/workflow/src/legenddataflow/scripts/build_raw_blind.py @@ -16,10 +16,11 @@ import numexpr as ne import numpy as np from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata, TextDB from lgdo import lh5 +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) argparser.add_argument("--output", help="output file", type=str) diff --git a/workflow/scripts/build_raw_fcio.py b/workflow/src/legenddataflow/scripts/build_raw_fcio.py similarity index 98% rename from workflow/scripts/build_raw_fcio.py rename to workflow/src/legenddataflow/scripts/build_raw_fcio.py index c3b577e..176565a 100644 --- a/workflow/scripts/build_raw_fcio.py +++ b/workflow/src/legenddataflow/scripts/build_raw_fcio.py @@ -6,7 +6,8 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log + +from ..log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/workflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py similarity index 98% rename from workflow/scripts/build_raw_orca.py rename to workflow/src/legenddataflow/scripts/build_raw_orca.py index c098806..899c742 100644 --- a/workflow/scripts/build_raw_orca.py +++ b/workflow/src/legenddataflow/scripts/build_raw_orca.py @@ -6,7 +6,8 @@ from daq2lh5 import build_raw from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log + +from ..log import build_log argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) diff --git a/workflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py similarity index 98% rename from workflow/scripts/build_skm.py rename to workflow/src/legenddataflow/scripts/build_skm.py index be2cfb3..0463c61 100644 --- a/workflow/scripts/build_skm.py +++ b/workflow/src/legenddataflow/scripts/build_skm.py @@ -3,10 +3,11 @@ import awkward as ak from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from lgdo import lh5 from lgdo.types import Array, Struct, Table, VectorOfVectors +from ..log import build_log + def get_all_out_fields(input_table, out_fields, current_field=""): for key in input_table: diff --git a/workflow/scripts/build_tcm.py b/workflow/src/legenddataflow/scripts/build_tcm.py similarity index 97% rename from workflow/scripts/build_tcm.py rename to workflow/src/legenddataflow/scripts/build_tcm.py index 402c567..7e6ab73 100644 --- a/workflow/scripts/build_tcm.py +++ b/workflow/src/legenddataflow/scripts/build_tcm.py @@ -6,9 +6,10 @@ from daq2lh5.orca import orca_flashcam from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from pygama.evt.build_tcm import build_tcm +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("input", help="input file", type=str) argparser.add_argument("output", help="output file", type=str) diff --git a/workflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py similarity index 98% rename from workflow/scripts/check_blinding.py rename to workflow/src/legenddataflow/scripts/check_blinding.py index 2a47172..37bf4e9 100644 --- a/workflow/scripts/check_blinding.py +++ b/workflow/src/legenddataflow/scripts/check_blinding.py @@ -16,12 +16,13 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo import lh5 from pygama.math.histogram import get_hist from pygama.pargen.energy_cal import get_i_local_maxima +from ..log import build_log + mpl.use("Agg") argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py similarity index 98% rename from workflow/scripts/complete_run.py rename to workflow/src/legenddataflow/scripts/complete_run.py index 4d5cad7..1223c5c 100644 --- a/workflow/scripts/complete_run.py +++ b/workflow/src/legenddataflow/scripts/complete_run.py @@ -7,9 +7,9 @@ import time from pathlib import Path -import legenddataflow.utils as ut -from legenddataflow import patterns -from legenddataflow.FileKey import FileKey +from ..FileKey import FileKey +from . import patterns +from . import utils as ut print("INFO: dataflow ran successfully, now few final checks and scripts") diff --git a/workflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py similarity index 100% rename from workflow/scripts/create_chankeylist.py rename to workflow/src/legenddataflow/scripts/create_chankeylist.py diff --git a/workflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py similarity index 99% rename from workflow/scripts/merge_channels.py rename to workflow/src/legenddataflow/scripts/merge_channels.py index 1ca2026..d0f90f7 100644 --- a/workflow/scripts/merge_channels.py +++ b/workflow/src/legenddataflow/scripts/merge_channels.py @@ -5,10 +5,11 @@ import numpy as np from dbetto.catalog import Props -from legenddataflow.FileKey import ChannelProcKey from legendmeta import LegendMetadata from lgdo import lh5 +from ..FileKey import ChannelProcKey + def replace_path(d, old_path, new_path): if isinstance(d, dict): diff --git a/workflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py similarity index 99% rename from workflow/scripts/par_psp_geds.py rename to workflow/src/legenddataflow/scripts/par_psp_geds.py index c74ffa3..a765e64 100644 --- a/workflow/scripts/par_psp_geds.py +++ b/workflow/src/legenddataflow/scripts/par_psp_geds.py @@ -8,9 +8,10 @@ import matplotlib.pyplot as plt import numpy as np from dbetto.catalog import Props -from legenddataflow.FileKey import ChannelProcKey from legendmeta import LegendMetadata +from ..FileKey import ChannelProcKey + mpl.use("Agg") diff --git a/workflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py similarity index 97% rename from workflow/scripts/pars_dsp_build_svm_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py index 3b7b7ea..f6c0878 100644 --- a/workflow/scripts/pars_dsp_build_svm_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py @@ -4,10 +4,11 @@ from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from lgdo import lh5 from sklearn.svm import SVC +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) argparser.add_argument("--configs", help="config file", type=str) diff --git a/workflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py similarity index 99% rename from workflow/scripts/pars_dsp_dplms_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py index 5d33fb8..f7f878e 100644 --- a/workflow/scripts/pars_dsp_dplms_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py @@ -8,11 +8,12 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) diff --git a/workflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py similarity index 99% rename from workflow/scripts/pars_dsp_eopt_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py index e59ee54..1a6f2d1 100644 --- a/workflow/scripts/pars_dsp_eopt_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py @@ -11,7 +11,6 @@ from dbetto import TextDB from dbetto.catalog import Props from dspeed.units import unit_registry as ureg -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( @@ -20,6 +19,8 @@ run_one_dsp, ) +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/workflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py similarity index 99% rename from workflow/scripts/pars_dsp_evtsel_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py index dc76878..1398256 100644 --- a/workflow/scripts/pars_dsp_evtsel_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py @@ -12,11 +12,12 @@ import pygama.pargen.energy_cal as pgc from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py similarity index 98% rename from workflow/scripts/pars_dsp_nopt_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py index ae3aacb..9c5d5ff 100644 --- a/workflow/scripts/pars_dsp_nopt_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py @@ -8,11 +8,12 @@ import pygama.pargen.noise_optimization as pno from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp +from ..log import build_log + sto = lh5.LH5Store() argparser = argparse.ArgumentParser() diff --git a/workflow/scripts/pars_dsp_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py similarity index 100% rename from workflow/scripts/pars_dsp_svm_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py diff --git a/workflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py similarity index 99% rename from workflow/scripts/pars_dsp_tau_geds.py rename to workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py index 1149c69..552dd3e 100644 --- a/workflow/scripts/pars_dsp_tau_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py @@ -6,12 +6,13 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) argparser.add_argument("--metadata", help="metadata", type=str, required=True) diff --git a/workflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py similarity index 98% rename from workflow/scripts/pars_hit_aoe.py rename to workflow/src/legenddataflow/scripts/pars_hit_aoe.py index d7fa221..40ea3c3 100644 --- a/workflow/scripts/pars_hit_aoe.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py @@ -10,14 +10,15 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.convert_np import convert_dict_np_to_float -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from ..convert_np import convert_dict_np_to_float +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py similarity index 99% rename from workflow/scripts/pars_hit_ecal.py rename to workflow/src/legenddataflow/scripts/pars_hit_ecal.py index 8bf4f1f..9a2f3c5 100644 --- a/workflow/scripts/pars_hit_ecal.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py @@ -16,8 +16,6 @@ import pygama.math.histogram as pgh from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.convert_np import convert_dict_np_to_float -from legenddataflow.log import build_log from legendmeta import LegendMetadata from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly @@ -26,6 +24,9 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic +from ..convert_np import convert_dict_np_to_float +from ..log import build_log + log = logging.getLogger(__name__) mpl.use("agg") sto = lh5.LH5Store() diff --git a/workflow/scripts/pars_hit_lq.py b/workflow/src/legenddataflow/scripts/pars_hit_lq.py similarity index 95% rename from workflow/scripts/pars_hit_lq.py rename to workflow/src/legenddataflow/scripts/pars_hit_lq.py index c5f04cb..a7a2601 100644 --- a/workflow/scripts/pars_hit_lq.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_lq.py @@ -9,8 +9,6 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.convert_np import convert_dict_np_to_float -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 @@ -19,6 +17,9 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data +from ..convert_np import convert_dict_np_to_float +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/pars_hit_qc.py similarity index 98% rename from workflow/scripts/pars_hit_qc.py rename to workflow/src/legenddataflow/scripts/pars_hit_qc.py index c9d380f..c83dff7 100644 --- a/workflow/scripts/pars_hit_qc.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_qc.py @@ -11,8 +11,6 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.convert_np import convert_dict_np_to_float -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -22,6 +20,9 @@ ) from pygama.pargen.utils import load_data +from ..convert_np import convert_dict_np_to_float +from ..log import build_log + log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py similarity index 99% rename from workflow/scripts/pars_pht_aoecal.py rename to workflow/src/legenddataflow/scripts/pars_pht_aoecal.py index bbcf791..4ad0980 100644 --- a/workflow/scripts/pars_pht_aoecal.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py @@ -13,14 +13,15 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from ..FileKey import ChannelProcKey, ProcessingFileKey +from ..log import build_log + log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/pars_pht_fast.py similarity index 98% rename from workflow/scripts/pars_pht_fast.py rename to workflow/src/legenddataflow/scripts/pars_pht_fast.py index 1dfd1d6..a807fa6 100644 --- a/workflow/scripts/pars_pht_fast.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_fast.py @@ -10,8 +10,6 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pars_pht_aoecal import run_aoe_calibration from pars_pht_lqcal import run_lq_calibration @@ -19,6 +17,9 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from ..FileKey import ChannelProcKey, ProcessingFileKey +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/workflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py similarity index 96% rename from workflow/scripts/pars_pht_lqcal.py rename to workflow/src/legenddataflow/scripts/pars_pht_lqcal.py index 8826efd..a6a231a 100644 --- a/workflow/scripts/pars_pht_lqcal.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py @@ -11,8 +11,6 @@ import pandas as pd from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 @@ -21,6 +19,9 @@ from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data +from ..FileKey import ChannelProcKey, ProcessingFileKey +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py similarity index 99% rename from workflow/scripts/pars_pht_partcal.py rename to workflow/src/legenddataflow/scripts/pars_pht_partcal.py index b3e43c4..7bd8f66 100644 --- a/workflow/scripts/pars_pht_partcal.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py @@ -13,14 +13,15 @@ import pygama.math.histogram as pgh from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.math.distributions import nb_poly from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data +from ..FileKey import ChannelProcKey, ProcessingFileKey +from ..log import build_log + warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/workflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/pars_pht_qc.py similarity index 99% rename from workflow/scripts/pars_pht_qc.py rename to workflow/src/legenddataflow/scripts/pars_pht_qc.py index 2ad477a..e1cf4dd 100644 --- a/workflow/scripts/pars_pht_qc.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_qc.py @@ -11,8 +11,6 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.convert_np import convert_dict_np_to_float -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -22,6 +20,9 @@ ) from pygama.pargen.utils import load_data +from ..convert_np import convert_dict_np_to_float +from ..log import build_log + log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py similarity index 98% rename from workflow/scripts/pars_pht_qc_phy.py rename to workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py index 791fa2b..b48211f 100644 --- a/workflow/scripts/pars_pht_qc_phy.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py @@ -12,8 +12,6 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.convert_np import convert_dict_np_to_float -from legenddataflow.log import build_log from legendmeta import LegendMetadata from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( @@ -21,6 +19,9 @@ get_keys, ) +from ..convert_np import convert_dict_np_to_float +from ..log import build_log + log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/scripts/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py similarity index 97% rename from workflow/scripts/pars_tcm_pulser.py rename to workflow/src/legenddataflow/scripts/pars_tcm_pulser.py index 56700ec..5325dc1 100644 --- a/workflow/scripts/pars_tcm_pulser.py +++ b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py @@ -6,10 +6,11 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legenddataflow.log import build_log from legendmeta import LegendMetadata from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from ..log import build_log + argparser = argparse.ArgumentParser() argparser.add_argument("--configs", help="configs path", type=str, required=True) argparser.add_argument("--metadata", help="metadata", type=str, required=True) diff --git a/workflow/scripts/write_filelist.py b/workflow/src/legenddataflow/scripts/write_filelist.py similarity index 100% rename from workflow/scripts/write_filelist.py rename to workflow/src/legenddataflow/scripts/write_filelist.py From 69b13ab854090e2e7c9fa38f8b330a86689cd15b Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 16:08:03 +0100 Subject: [PATCH 071/101] move package version spec to pyproject.toml and implement --editable --- config-lngs.yaml | 7 ------ pyproject.toml | 18 +++++++-------- workflow/src/legenddataflow/execenv.py | 31 +++++++++----------------- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/config-lngs.yaml b/config-lngs.yaml index 356aa4e..5b04441 100644 --- a/config-lngs.yaml +++ b/config-lngs.yaml @@ -69,11 +69,4 @@ execenv: PYGAMA_FASTMATH: "false" DISABLE_TQDM: "True" -pkg_versions: - - pygama==2.0.* - - pylegendmeta==1.2.0a2 - - dspeed==1.6.* - - legend-pydataobj>=1.11.4 - - legend-daq2lh5==1.4.* - legend_metadata_version: v0.5.7 diff --git a/pyproject.toml b/pyproject.toml index 27e62e5..97b612a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,30 +52,28 @@ readme = "README.md" requires-python = ">=3.11" dynamic = ["version"] -# these are the dependencies strictly required by legend-dataflow dependencies = [ "colorlog", "dbetto>=1.0.5", + "pygama>=2", + "dspeed>=1.6", "pylegendmeta==1.2.0a2", + "legend-pydataobj>=1.11.4", + "legend-daq2lh5>=1.4", ] [project.optional-dependencies] # these are needed to run the data production -dataprod = [ +runprod = [ "snakemake>=8.16", - "pygama", - "dspeed", - "pylegendmeta==1.2.0a2", - "legend-pydataobj", - "legend-daq2lh5", ] test = [ - "legend_dataflow[dataprod]", + "legend_dataflow[runprod]", "pytest>=6", "pytest-cov>=3", ] dev = [ - "legend_dataflow[dataprod,test]", + "legend_dataflow[runprod,test]", "pre-commit", ] docs = [ @@ -88,4 +86,4 @@ docs = [ ] [project.scripts] -dataprod = "legenddataflow.library.execenv:dataprod" +dataprod = "legenddataflow.execenv:dataprod" diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index ad54d11..6fdde1d 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -11,7 +11,6 @@ import colorlog import dbetto from dbetto import AttrsDict -from packaging.requirements import Requirement from . import utils @@ -102,6 +101,12 @@ def dataprod() -> None: help="remove software directory before installing software", action="store_true", ) + parser_install.add_argument( + "-e", + "--editable", + help="install software with pip's --editable flag", + action="store_true", + ) parser_install.set_defaults(func=install) parser_exec = subparsers.add_parser( @@ -218,25 +223,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): log.info("installing uv") _runcmd(cmd_expr, cmd_env) - # now packages - - path_src = Path(config_dict.paths.src) - pkg_list = [] - for spec in config_dict.pkg_versions: - pkg = Requirement(spec).name - if (path_src / pkg).exists(): - pkg_list.append(str(path_src / pkg)) - else: - pkg_list.append(spec) - - cmd_base = [*python, "-m", "uv", "pip", "--no-cache", "install"] - - cmd_expr = cmd_base + pkg_list - - log.info("installing packages") - _runcmd(cmd_expr, cmd_env) - - # and finally legenddataflow + # and finally install legenddataflow with all dependencies cmd_expr = [ *python, @@ -245,10 +232,12 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): "pip", "--no-cache", "install", - # "--editable", # TODO do we really want this? str(config_loc), ] + if args.editable: + cmd_expr.insert(-1, "--editable") + log.info("installing packages") _runcmd(cmd_expr, cmd_env) From 8e5e4a65187bbd541f29c8d40336ea466295a0d4 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 16:50:21 +0100 Subject: [PATCH 072/101] fix pre-commit config --- .pre-commit-config.yaml | 13 +- pyproject.toml | 78 ++++++++++- tests/test_util.py | 25 ++-- workflow/src/legenddataflow/FileKey.py | 12 +- workflow/src/legenddataflow/cal_grouping.py | 16 ++- workflow/src/legenddataflow/convert_np.py | 3 +- .../src/legenddataflow/create_pars_keylist.py | 13 +- workflow/src/legenddataflow/execenv.py | 16 ++- workflow/src/legenddataflow/pars_loading.py | 8 +- workflow/src/legenddataflow/patterns.py | 60 +++++++-- .../scripts/blinding_calibration.py | 10 +- .../src/legenddataflow/scripts/build_dsp.py | 17 ++- .../src/legenddataflow/scripts/build_evt.py | 8 +- .../src/legenddataflow/scripts/build_fdb.py | 4 +- .../src/legenddataflow/scripts/build_hit.py | 4 +- .../legenddataflow/scripts/build_raw_blind.py | 32 +++-- .../legenddataflow/scripts/build_raw_orca.py | 36 ++++-- .../src/legenddataflow/scripts/build_skm.py | 4 +- .../legenddataflow/scripts/check_blinding.py | 21 ++- .../legenddataflow/scripts/complete_run.py | 48 +++++-- .../legenddataflow/scripts/merge_channels.py | 8 +- .../legenddataflow/scripts/par_psp_geds.py | 24 +++- .../scripts/pars_dsp_build_svm_geds.py | 4 +- .../scripts/pars_dsp_dplms_geds.py | 22 +++- .../scripts/pars_dsp_eopt_geds.py | 24 +++- .../scripts/pars_dsp_evtsel_geds.py | 91 ++++++++++--- .../scripts/pars_dsp_nopt_geds.py | 8 +- .../scripts/pars_dsp_tau_geds.py | 18 ++- .../legenddataflow/scripts/pars_hit_aoe.py | 11 +- .../legenddataflow/scripts/pars_hit_ecal.py | 85 ++++++++---- .../src/legenddataflow/scripts/pars_hit_qc.py | 44 +++++-- .../legenddataflow/scripts/pars_pht_aoecal.py | 64 ++++++--- .../legenddataflow/scripts/pars_pht_fast.py | 28 +++- .../legenddataflow/scripts/pars_pht_lqcal.py | 38 ++++-- .../scripts/pars_pht_partcal.py | 121 +++++++++++------- .../src/legenddataflow/scripts/pars_pht_qc.py | 31 ++++- .../legenddataflow/scripts/pars_pht_qc_phy.py | 26 +++- .../legenddataflow/scripts/pars_tcm_pulser.py | 4 +- workflow/src/legenddataflow/utils.py | 23 +++- 39 files changed, 817 insertions(+), 285 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e369b65..0611a74 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,12 @@ ci: autofix_commit_msg: "style: pre-commit fixes" repos: + - repo: https://github.com/adamchainz/blacken-docs + rev: "1.19.1" + hooks: + - id: blacken-docs + additional_dependencies: [black==24.*] + - repo: https://github.com/pre-commit/pre-commit-hooks rev: "v5.0.0" hooks: @@ -24,17 +30,12 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace - - repo: https://github.com/psf/black - rev: "24.10.0" - hooks: - - id: black-jupyter - args: ["--line-length", "99"] - - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.8.6" hooks: - id: ruff args: ["--fix", "--show-fixes"] + - id: ruff-format - repo: https://github.com/abravalheri/validate-pyproject rev: "v0.23" diff --git a/pyproject.toml b/pyproject.toml index 97b612a..807e71b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,3 @@ -[tool.uv.workspace] -exclude = ["generated", "inputs", "software", "workflow"] - [build-system] requires = [ "setuptools>=61.2", @@ -87,3 +84,78 @@ docs = [ [project.scripts] dataprod = "legenddataflow.execenv:dataprod" + +[tool.uv.workspace] +exclude = ["generated", "inputs", "software", "workflow"] + +[tool.uv] +dev-dependencies = [ + "legend-dataflow[test]", +] + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +filterwarnings = [ + "error", +] +log_cli_level = "INFO" +testpaths = [ + "tests", +] + +[tool.ruff] +src = ["workflow/src"] + +[tool.ruff.lint] +extend-select = [ + "ARG", # flake8-unused-arguments + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "EXE", # flake8-executable + "G", # flake8-logging-format + "I", # isort + "ICN", # flake8-import-conventions + "NPY", # NumPy specific rules + "PD", # pandas-vet + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 +] +ignore = [ + "PT011", # `pytest.raises(ValueError)` is too broad + "PLR09", # Too many <...> + "PLR2004", # Magic value used in comparison + "ISC001", # Conflicts with formatter +] +isort.required-imports = ["from __future__ import annotations"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] +"noxfile.py" = ["T20"] + + +[tool.pylint] +py-version = "3.9" +ignore-paths = [".*/_version.py"] +reports.output-format = "colorized" +similarities.ignore-imports = "yes" +messages_control.disable = [ + "design", + "fixme", + "line-too-long", + "missing-module-docstring", + "missing-function-docstring", + "wrong-import-position", + "too-many-nested-blocks" +] diff --git a/tests/test_util.py b/tests/test_util.py index acaf609..c1654e0 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -39,7 +39,9 @@ def test_filekey(): assert key.name == "l200-p00-r000-cal-*" key = FileKey.from_string("l200-p00-r000-cal-20230101T123456Z") assert key.name == "l200-p00-r000-cal-20230101T123456Z" - key = FileKey.get_filekey_from_filename("l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5") + key = FileKey.get_filekey_from_filename( + "l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5" + ) assert key.name == "l200-p00-r000-cal-20230101T123456Z" assert ( key.get_path_from_filekey(get_pattern_tier_dsp(setup))[0] @@ -57,7 +59,8 @@ def test_filekey(): def test_create_pars_keylist(): key1 = FileKey("l200", "p00", "r000", "cal", "20230101T123456Z") assert ( - pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from == "20230101T123456Z" + pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from + == "20230101T123456Z" ) key2 = FileKey("l200", "p00", "r000", "cal", "20230102T123456Z") assert pars_key_resolve.match_keys(key1, key2) == key1 @@ -68,7 +71,9 @@ def test_create_pars_keylist(): pkey2 = pars_key_resolve.from_filekey( FileKey("l200", "p00", "r000", "lar", "20230102T123456Z"), {"lar": ["par_dsp"]} ) - assert pkey2.apply == ["lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json"] + assert pkey2.apply == [ + "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json" + ] pars_key_resolve.match_entries(pkey1, pkey2) assert set(pkey2.apply) == { "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", @@ -95,9 +100,9 @@ def test_create_pars_keylist(): pkeylist = pars_key_resolve.generate_par_keylist(keylist) assert pkeylist == keylist assert set( - pars_key_resolve.match_all_entries(pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]})[ - 1 - ].apply + pars_key_resolve.match_all_entries( + pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]} + )[1].apply ) == { "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", @@ -108,7 +113,9 @@ def test_pars_loading(): pars_files = CalibCatalog.get_calib_files( Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z" ) - assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] + assert pars_files == [ + "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json" + ] par_override_files = CalibCatalog.get_calib_files( Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z" @@ -118,7 +125,9 @@ def test_pars_loading(): pars_files, par_override_files ) - assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"] + assert pars_files == [ + "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json" + ] assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == { ( diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py index 63a1842..c11e6e5 100644 --- a/workflow/src/legenddataflow/FileKey.py +++ b/workflow/src/legenddataflow/FileKey.py @@ -31,7 +31,9 @@ def regex_from_filepattern(filepattern): return "".join(f) -class FileKey(namedtuple("FileKey", ["experiment", "period", "run", "datatype", "timestamp"])): +class FileKey( + namedtuple("FileKey", ["experiment", "period", "run", "datatype", "timestamp"]) +): __slots__ = () re_pattern = "(-(?P[^-]+)(\\-(?P[^-]+)(\\-(?P[^-]+)(\\-(?P[^-]+)(\\-(?P[^-]+))?)?)?)?)?$" @@ -110,7 +112,9 @@ def get_path_from_filekey(self, pattern, **kwargs): for entry, value in kwargs.items(): if isinstance(value, dict): if len(next(iter(set(value).intersection(self._list())))) > 0: - kwargs[entry] = value[next(iter(set(value).intersection(self._list())))] + kwargs[entry] = value[ + next(iter(set(value).intersection(self._list()))) + ] else: kwargs.pop(entry) return self.expand(pattern, **kwargs) @@ -185,7 +189,9 @@ def get_path_from_filekey(self, pattern, **kwargs): for entry, value in kwargs.items(): if isinstance(value, dict): if len(next(iter(set(value).intersection(self._list())))) > 0: - kwargs[entry] = value[next(iter(set(value).intersection(self._list())))] + kwargs[entry] = value[ + next(iter(set(value).intersection(self._list()))) + ] else: kwargs.pop(entry) return self.expand(pattern, **kwargs) diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py index e41d5c7..ce06c1d 100644 --- a/workflow/src/legenddataflow/cal_grouping.py +++ b/workflow/src/legenddataflow/cal_grouping.py @@ -79,9 +79,9 @@ def get_par_files( for par_file in par_files: if ( par_file.split("-")[-1] - == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( - "-" - )[-1] + == str( + get_pattern_pars(self.setup, tier, check_in_cycle=False).name + ).split("-")[-1] ): all_par_files.append(par_file) if channel == "default": @@ -132,9 +132,9 @@ def get_plt_files( for par_file in par_files: if ( par_file.split("-")[-1] - == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split( - "-" - )[-1] + == str( + get_pattern_pars(self.setup, tier, check_in_cycle=False).name + ).split("-")[-1] ): all_par_files.append(par_file) if channel == "default": @@ -190,7 +190,9 @@ def get_log_file( fk.channel = channel return fk.get_path_from_filekey(get_pattern_log_channel(self.setup, name))[0] - def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", datatype="cal"): + def get_timestamp( + self, catalog, dataset, channel, tier, experiment="l200", datatype="cal" + ): par_files = self.get_par_files( catalog, dataset, diff --git a/workflow/src/legenddataflow/convert_np.py b/workflow/src/legenddataflow/convert_np.py index cdc363c..dbd8978 100644 --- a/workflow/src/legenddataflow/convert_np.py +++ b/workflow/src/legenddataflow/convert_np.py @@ -9,6 +9,7 @@ def convert_dict_np_to_float(dic): dic[key] = float(dic[key]) elif isinstance(dic[key], (list, tuple)): dic[key] = [ - float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key] + float(x) if isinstance(x, (np.float32, np.float64)) else x + for x in dic[key] ] return dic diff --git a/workflow/src/legenddataflow/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py index a82ef0c..9325a6d 100644 --- a/workflow/src/legenddataflow/create_pars_keylist.py +++ b/workflow/src/legenddataflow/create_pars_keylist.py @@ -15,7 +15,6 @@ class ParsKeyResolve: - def __init__(self, valid_from, category, apply): self.valid_from = valid_from self.category = category @@ -47,7 +46,9 @@ def write_to_jsonl(file_names, path): @staticmethod def write_to_yaml(file_names, path): with Path(path).open("w") as of: - yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False) + yaml.dump( + [file_name.__dict__ for file_name in file_names], of, sort_keys=False + ) @staticmethod def match_keys(key1, key2): @@ -79,7 +80,9 @@ def generate_par_keylist(keys): @staticmethod def match_entries(entry1, entry2): - datatype2 = ProcessingFileKey.get_filekey_from_filename(entry2.apply[0]).datatype + datatype2 = ProcessingFileKey.get_filekey_from_filename( + entry2.apply[0] + ).datatype for entry in entry1.apply: if ProcessingFileKey.get_filekey_from_filename(entry).datatype == datatype2: pass @@ -105,7 +108,9 @@ def get_keys(keypart, search_pattern): else: wildcard_dict = d._asdict() try: - tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern))) + tier_pattern_rx = re.compile( + smk.io.regex_from_filepattern(str(search_pattern)) + ) except AttributeError: tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index 6fdde1d..6a0239d 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -88,13 +88,17 @@ def dataprod() -> None: prog="dataprod", description="dataprod's command-line interface" ) - parser.add_argument("-v", "--verbose", help="increase verbosity", action="store_true") + parser.add_argument( + "-v", "--verbose", help="increase verbosity", action="store_true" + ) subparsers = parser.add_subparsers() parser_install = subparsers.add_parser( "install", help="install user software in data production environment" ) - parser_install.add_argument("config_file", help="production cycle configuration file") + parser_install.add_argument( + "config_file", help="production cycle configuration file" + ) parser_install.add_argument( "-r", "--remove", @@ -112,7 +116,9 @@ def dataprod() -> None: parser_exec = subparsers.add_parser( "exec", help="load data production environment and execute a given command" ) - parser_exec.add_argument("config_file", help="production cycle configuration file", type=str) + parser_exec.add_argument( + "config_file", help="production cycle configuration file", type=str + ) parser_exec.add_argument( "command", help="command to run within the container", type=str, nargs="+" ) @@ -123,7 +129,9 @@ def dataprod() -> None: if args.verbose: handler = colorlog.StreamHandler() handler.setFormatter( - colorlog.ColoredFormatter("%(log_color)s%(name)s [%(levelname)s] %(message)s") + colorlog.ColoredFormatter( + "%(log_color)s%(name)s [%(levelname)s] %(message)s" + ) ) logger = logging.getLogger("legenddataflow") diff --git a/workflow/src/legenddataflow/pars_loading.py b/workflow/src/legenddataflow/pars_loading.py index 80f54a6..bd23011 100644 --- a/workflow/src/legenddataflow/pars_loading.py +++ b/workflow/src/legenddataflow/pars_loading.py @@ -20,7 +20,10 @@ def match_pars_files(filelist1, filelist2): fk2 = ProcessingFileKey.get_filekey_from_pattern(file2) for j, file1 in enumerate(filelist1): fk1 = ProcessingFileKey.get_filekey_from_pattern(file1) - if fk1.processing_step == fk2.processing_step and fk1.datatype == fk2.datatype: + if ( + fk1.processing_step == fk2.processing_step + and fk1.datatype == fk2.datatype + ): filelist1[j] = file2 if len(filelist2) > 1: filelist2.remove(file2) @@ -41,7 +44,8 @@ def get_par_file(setup, timestamp, tier): pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files] if len(pars_files_overwrite) > 0: pars_overwrite_files = [ - Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite + Path(par_overwrite_path(setup)) / tier / file + for file in pars_files_overwrite ] pars_files += pars_overwrite_files return pars_files diff --git a/workflow/src/legenddataflow/patterns.py b/workflow/src/legenddataflow/patterns.py index 28d27db..71f5db4 100644 --- a/workflow/src/legenddataflow/patterns.py +++ b/workflow/src/legenddataflow/patterns.py @@ -46,7 +46,9 @@ def processing_overwrite_pattern(): def full_channel_pattern(): - return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}" + return ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}" + ) def full_channel_pattern_with_extension(): @@ -89,7 +91,10 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): / "{datatype}" / "{period}" / "{run}" - / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5") + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + + f"{tier}.lh5" + ) ) elif tier in ["evt_concat", "pet_concat"]: file_pattern = ( @@ -107,7 +112,10 @@ def get_pattern_tier(setup, tier, check_in_cycle=True): else: msg = "invalid tier" raise Exception(msg) - if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True: + if ( + tier_path(setup) not in str(file_pattern.resolve(strict=False)) + and check_in_cycle is True + ): return "/tmp/" + file_pattern.name else: return file_pattern @@ -132,7 +140,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr / "cal" / "{period}" / "{run}" - / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}") + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-par_" + + f"{tier}.{extension}" + ) ) else: msg = "invalid tier" @@ -142,7 +153,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr and check_in_cycle is True ): if name is None: - return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}" + return ( + "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}.{extension}" + ) else: return ( "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" @@ -160,7 +174,10 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"): / "cal" / "{period}" / "{run}" - / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}") + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-" + + f"par_{tier}_{name}.{ext}" + ) ) else: return ( @@ -206,7 +223,10 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" datatype = "{datatype}" if name is None: return Path(f"{tmp_par_path(setup)}") / ( - "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}" + "{experiment}-{period}-{run}-" + + datatype + + "-{timestamp}-par_" + + f"{tier}.{extension}" ) else: return Path(f"{tmp_par_path(setup)}") / ( @@ -220,7 +240,8 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml" def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"): if name is None: return Path(f"{tmp_par_path(setup)}") / ( - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}" + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + + f"{tier}.{extension}" ) else: return Path(f"{tmp_par_path(setup)}") / ( @@ -236,7 +257,8 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None): ) else: return Path(f"{tmp_plts_path(setup)}") / ( - "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl" + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + + f"{tier}_{name}.pkl" ) @@ -257,7 +279,13 @@ def get_pattern_plts(setup, tier, name=None): / "cal" / "{period}" / "{run}" - / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir") + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + + tier + + "_" + + name + + ".dir" + ) ) @@ -265,7 +293,11 @@ def get_pattern_log(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log") + / ( + "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + + processing_step + + ".log" + ) ) @@ -273,7 +305,11 @@ def get_pattern_log_channel(setup, processing_step): return ( Path(f"{tmp_log_path(setup)}") / processing_step - / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log") + / ( + "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + + processing_step + + ".log" + ) ) diff --git a/workflow/src/legenddataflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py index 4a666cc..e4b79f2 100644 --- a/workflow/src/legenddataflow/scripts/blinding_calibration.py +++ b/workflow/src/legenddataflow/scripts/blinding_calibration.py @@ -50,7 +50,9 @@ # if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True: pars_dict = {} # peaks to search for -peaks_keV = np.array([238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50]) +peaks_keV = np.array( + [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50] +) E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np") E_uncal = E_uncal[E_uncal > 200] @@ -98,7 +100,11 @@ ax.set_ylabel("counts") ax.set_yscale("log") ax2 = plt.subplot(212) -ax2.hist(E_uncal * roughpars[0], bins=np.arange(2600, 2630, 1 * roughpars[0]), histtype="step") +ax2.hist( + E_uncal * roughpars[0], + bins=np.arange(2600, 2630, 1 * roughpars[0]), + histtype="step", +) ax2.set_xlabel("energy (keV)") ax2.set_ylabel("counts") plt.suptitle(args.channel) diff --git a/workflow/src/legenddataflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py index 137eb1f..7e44bb6 100644 --- a/workflow/src/legenddataflow/scripts/build_dsp.py +++ b/workflow/src/legenddataflow/scripts/build_dsp.py @@ -36,7 +36,9 @@ def replace_list_with_array(dic): argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--tier", help="Tier", type=str, required=True) -argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[]) +argparser.add_argument( + "--pars_file", help="database file for detector", nargs="*", default=[] +) argparser.add_argument("--input", help="input file", type=str) argparser.add_argument("--output", help="output file", type=str) @@ -74,7 +76,9 @@ def replace_list_with_array(dic): for chan, file in channel_dict.items() } db_files = [ - par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml") + par_file + for par_file in args.pars_file + if Path(par_file).suffix in (".json", ".yaml", ".yml") ] database_dic = Props.read_from(db_files, subst_pathvar=True) @@ -105,9 +109,12 @@ def replace_list_with_array(dic): key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") if args.tier in ["dsp", "psp"]: - - raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)] - raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")] + raw_channels = [ + channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel) + ] + raw_fields = [ + field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/") + ] outputs = {} channels = [] diff --git a/workflow/src/legenddataflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py index b0bf2a4..b4723b4 100644 --- a/workflow/src/legenddataflow/scripts/build_evt.py +++ b/workflow/src/legenddataflow/scripts/build_evt.py @@ -53,7 +53,9 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): # load in config configs = TextDB(args.configs, lazy=True) if args.tier in ("evt", "pet"): - rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"] + rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_evt" + ] else: msg = "unknown tier" @@ -74,7 +76,9 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): exp_string = exp_string.replace( 'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"' ) - exp_string = exp_string.replace('cal_par_files=""', f"cal_par_files={args.par_files}") + exp_string = exp_string.replace( + 'cal_par_files=""', f"cal_par_files={args.par_files}" + ) exp_string2 = exp_string.replace('return_mode="energy"', 'return_mode="tcm_index"') file_path_config = { diff --git a/workflow/src/legenddataflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py index f628341..93a3567 100644 --- a/workflow/src/legenddataflow/scripts/build_fdb.py +++ b/workflow/src/legenddataflow/scripts/build_fdb.py @@ -41,7 +41,9 @@ timestamps = np.zeros(len(fdb.df), dtype="float64") for i, row in enumerate(fdb.df.itertuples()): - store = lh5.LH5Store(base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True) + store = lh5.LH5Store( + base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True + ) # list of first timestamps for each channel loc_timestamps = np.full(len(row.raw_tables), fill_value=default, dtype="float64") diff --git a/workflow/src/legenddataflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py index 4f31947..47b0fa0 100644 --- a/workflow/src/legenddataflow/scripts/build_hit.py +++ b/workflow/src/legenddataflow/scripts/build_hit.py @@ -27,7 +27,9 @@ configs = TextDB(args.configs, lazy=True) if args.tier == "hit" or args.tier == "pht": - config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"] + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_hit" + ] else: msg = "unknown tier" raise ValueError(msg) diff --git a/workflow/src/legenddataflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py index ef704dd..3d42717 100644 --- a/workflow/src/legenddataflow/scripts/build_raw_blind.py +++ b/workflow/src/legenddataflow/scripts/build_raw_blind.py @@ -36,7 +36,9 @@ args = argparser.parse_args() configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_raw" +] log = build_log(config_dict, args.log) @@ -53,19 +55,29 @@ # list of Ge channels and SiPM channels with associated metadata legendmetadata = LegendMetadata(args.metadata, lazy=True) ged_channels = ( - legendmetadata.channelmap(args.timestamp).map("system", unique=False)["geds"].map("daq.rawid") + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["geds"] + .map("daq.rawid") ) spms_channels = ( - legendmetadata.channelmap(args.timestamp).map("system", unique=False)["spms"].map("daq.rawid") + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["spms"] + .map("daq.rawid") ) auxs_channels = ( - legendmetadata.channelmap(args.timestamp).map("system", unique=False)["auxs"].map("daq.rawid") + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["auxs"] + .map("daq.rawid") ) blsn_channels = ( - legendmetadata.channelmap(args.timestamp).map("system", unique=False)["bsln"].map("daq.rawid") + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["bsln"] + .map("daq.rawid") ) puls_channels = ( - legendmetadata.channelmap(args.timestamp).map("system", unique=False)["puls"].map("daq.rawid") + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["puls"] + .map("daq.rawid") ) store = lh5.LH5Store() @@ -88,7 +100,9 @@ # calibrate daq energy using pre existing curve daqenergy_cal = ne.evaluate( blind_curve["daqenergy_cal"]["expression"], - local_dict=dict(daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]), + local_dict=dict( + daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] + ), ) # figure out which event indices should be blinded @@ -148,7 +162,9 @@ # the rest should be the Ge and SiPM channels that need to be blinded # read in all of the data but only for the unblinded events - blinded_chobj, _ = store.read(channel + "/raw", args.input, idx=tokeep, decompress=False) + blinded_chobj, _ = store.read( + channel + "/raw", args.input, idx=tokeep, decompress=False + ) # now write the blinded data for this channel store.write_object( diff --git a/workflow/src/legenddataflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py index 899c742..72b5ac6 100644 --- a/workflow/src/legenddataflow/scripts/build_raw_orca.py +++ b/workflow/src/legenddataflow/scripts/build_raw_orca.py @@ -25,7 +25,9 @@ Path(args.output).parent.mkdir(parents=True, exist_ok=True) configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"] +config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_raw" +] log = build_log(config_dict, args.log) @@ -40,7 +42,9 @@ ged_config = Props.read_from(channel_dict["geds_config"]) ged_channels = list( - chmap.channelmaps.on(args.timestamp).map("system", unique=False)["geds"].map("daq.rawid") + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["geds"] + .map("daq.rawid") ) ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) @@ -50,7 +54,9 @@ spm_config = Props.read_from(channel_dict["spms_config"]) spm_channels = list( - chmap.channelmaps.on(args.timestamp).map("system", unique=False)["spms"].map("daq.rawid") + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["spms"] + .map("daq.rawid") ) spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) @@ -59,25 +65,37 @@ if "auxs_config" in list(channel_dict): aux_config = Props.read_from(channel_dict["auxs_config"]) aux_channels = list( - chmap.channelmaps.on(args.timestamp).map("system", unique=False)["auxs"].map("daq.rawid") + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["auxs"] + .map("daq.rawid") ) aux_channels += list( - chmap.channelmaps.on(args.timestamp).map("system", unique=False)["puls"].map("daq.rawid") + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["puls"] + .map("daq.rawid") ) aux_channels += list( - chmap.channelmaps.on(args.timestamp).map("system", unique=False)["bsln"].map("daq.rawid") + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["bsln"] + .map("daq.rawid") ) top_key = next(iter(aux_config)) - aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels) + aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted( + aux_channels + ) Props.add_to(all_config, aux_config) if "muon_config" in list(channel_dict): muon_config = Props.read_from(channel_dict["muon_config"]) muon_channels = list( - chmap.channelmaps.on(args.timestamp).map("system", unique=False)["muon"].map("daq.rawid") + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["muon"] + .map("daq.rawid") ) top_key = next(iter(muon_config)) - muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels) + muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( + muon_channels + ) Props.add_to(all_config, muon_config) rng = np.random.default_rng() diff --git a/workflow/src/legenddataflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py index 0463c61..9411b1b 100644 --- a/workflow/src/legenddataflow/scripts/build_skm.py +++ b/workflow/src/legenddataflow/scripts/build_skm.py @@ -59,7 +59,9 @@ def get_all_out_fields(input_table, out_fields, current_field=""): if isinstance(ptr1[items[-1]], Table): out_fields.remove(field) - out_fields = get_all_out_fields(ptr1[items[-1]], out_fields, current_field=field) + out_fields = get_all_out_fields( + ptr1[items[-1]], out_fields, current_field=field + ) # remove unwanted columns out_table_skm = Table(size=len(out_table)) diff --git a/workflow/src/legenddataflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py index 37bf4e9..faf800d 100644 --- a/workflow/src/legenddataflow/scripts/check_blinding.py +++ b/workflow/src/legenddataflow/scripts/check_blinding.py @@ -29,7 +29,9 @@ argparser.add_argument("--files", help="files", nargs="*", type=str) argparser.add_argument("--output", help="output file", type=str) argparser.add_argument("--plot_file", help="plot file", type=str) -argparser.add_argument("--blind_curve", help="blinding curves file", nargs="*", type=str) +argparser.add_argument( + "--blind_curve", help="blinding curves file", nargs="*", type=str +) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--configs", help="config file", type=str) @@ -44,14 +46,18 @@ log = build_log(config_dict, args.log) # get the usability status for this channel -chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") +chmap = ( + LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") +) det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"] # read in calibration curve for this channel blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"] # load in the data -daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np") +daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as( + "np" +) # calibrate daq energy using pre existing curve daqenergy_cal = ne.evaluate( @@ -86,11 +92,16 @@ # check for peaks within +- 5keV of 2614 and 583 to ensure blinding still # valid and if so create file else raise error. if detector is in ac mode it # will always pass this check -if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False: +if ( + np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) +) or det_status is False: Path(args.output).mkdir(parents=True, exist_ok=True) Props.write_to( args.output, - {"threshold_adc": np.nanmin(daqenergy), "threshold_kev": np.nanmin(daqenergy_cal)}, + { + "threshold_adc": np.nanmin(daqenergy), + "threshold_kev": np.nanmin(daqenergy_cal), + }, ) else: msg = "peaks not found in daqenergy" diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py index 1223c5c..e3892eb 100644 --- a/workflow/src/legenddataflow/scripts/complete_run.py +++ b/workflow/src/legenddataflow/scripts/complete_run.py @@ -50,9 +50,13 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): Path(file).unlink() text = None if n_errors == 0: - f.write(f"{gen_output} successfully generated at {now} with no errors \n") + f.write( + f"{gen_output} successfully generated at {now} with no errors \n" + ) if n_warnings == 0: - w.write(f"{gen_output} successfully generated at {now} with no warnings \n") + w.write( + f"{gen_output} successfully generated at {now} with no warnings \n" + ) else: with Path(output_file).open("w") as f: n_errors = 0 @@ -73,7 +77,9 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None): Path(file).unlink() text = None if n_errors == 0: - f.write(f"{gen_output} successfully generated at {now} with no errors \n") + f.write( + f"{gen_output} successfully generated at {now} with no errors \n" + ) walk = list(os.walk(log_path)) for path, _, _ in walk[::-1]: if len(os.listdir(path)) == 0: @@ -139,7 +145,9 @@ def build_valid_keys(input_files_regex, output_dir): for key in list(key_dict): dtype = key.split("-")[-1] - out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json' + out_file = ( + Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json' + ) out_file.parent.mkdir(parents=True, exist_ok=True) if Path(out_file).is_file(): out_dict = Props.read_from([out_file] + key_dict[key]) @@ -163,7 +171,9 @@ def find_gen_runs(gen_tier_path): # then look for concat tiers (use filenames now) paths_concat = gen_tier_path.glob("*/*/*.lh5") # use the directories to build a datatype/period/run string - runs_concat = {"/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat} + runs_concat = { + "/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat + } return runs | runs_concat @@ -186,7 +196,10 @@ def build_file_dbs(gen_tier_path, outdir): outdir.mkdir(parents=True, exist_ok=True) # TODO: replace l200 with {experiment} outfile = outdir / f"l200-{speck[1]}-{speck[2]}-{speck[0]}-filedb.h5" - logfile = Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name + logfile = ( + Path(ut.tmp_log_path(snakemake.params.setup)) + / outfile.with_suffix(".log").name + ) print(f"INFO: ......building {outfile}") cmdline = [ @@ -223,7 +236,11 @@ def build_file_dbs(gen_tier_path, outdir): for p in processes: if p.returncode != 0: - _cmdline = " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + " " + " ".join(p.args) + _cmdline = ( + " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + + " " + + " ".join(p.args) + ) msg = f"at least one FileDB building thread failed: {_cmdline}" raise RuntimeError(msg) @@ -234,11 +251,16 @@ def build_file_dbs(gen_tier_path, outdir): file_db_config = {} -if os.getenv("PRODENV") is not None and os.getenv("PRODENV") in snakemake.params.filedb_path: +if ( + os.getenv("PRODENV") is not None + and os.getenv("PRODENV") in snakemake.params.filedb_path +): prodenv = as_ro(os.getenv("PRODENV")) def tdirs(tier): - return as_ro(ut.get_tier_path(snakemake.params.setup, tier)).replace(prodenv, "") + return as_ro(ut.get_tier_path(snakemake.params.setup, tier)).replace( + prodenv, "" + ) file_db_config["data_dir"] = "$PRODENV" @@ -251,11 +273,15 @@ def tdirs(tier): file_db_config["data_dir"] = "/" -file_db_config["tier_dirs"] = {k: tdirs(k) for k in snakemake.params.setup["table_format"]} +file_db_config["tier_dirs"] = { + k: tdirs(k) for k in snakemake.params.setup["table_format"] +} def fformat(tier): - abs_path = patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False) + abs_path = patterns.get_pattern_tier( + snakemake.params.setup, tier, check_in_cycle=False + ) return str(abs_path).replace(ut.get_tier_path(snakemake.params.setup, tier), "") diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py index d0f90f7..d6fec7a 100644 --- a/workflow/src/legenddataflow/scripts/merge_channels.py +++ b/workflow/src/legenddataflow/scripts/merge_channels.py @@ -86,7 +86,6 @@ def replace_path(d, old_path, new_path): if chmap is not None: channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" else: - channel_name = fkey.channel out_dict[channel_name] = channel_dict else: @@ -104,7 +103,6 @@ def replace_path(d, old_path, new_path): if chmap is not None: channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" else: - channel_name = fkey.channel out_dict[channel_name] = channel_dict @@ -123,7 +121,6 @@ def replace_path(d, old_path, new_path): if chmap is not None: channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" else: - channel_name = fkey.channel if isinstance(channel_dict, dict) and "common" in list(channel_dict): chan_common_dict = channel_dict.pop("common") @@ -142,7 +139,6 @@ def replace_path(d, old_path, new_path): if chmap is not None: channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" else: - channel_name = fkey.channel tb_in = lh5.read(f"{channel_name}", channel) @@ -153,7 +149,9 @@ def replace_path(d, old_path, new_path): wo_mode="a", ) if args.in_db: - db_dict[channel_name] = replace_path(db_dict[channel_name], channel, args.output) + db_dict[channel_name] = replace_path( + db_dict[channel_name], channel, args.output + ) else: msg = "Output file extension does not match input file extension" raise RuntimeError(msg) diff --git a/workflow/src/legenddataflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py index a765e64..e65903c 100644 --- a/workflow/src/legenddataflow/scripts/par_psp_geds.py +++ b/workflow/src/legenddataflow/scripts/par_psp_geds.py @@ -16,13 +16,21 @@ argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input files", nargs="*", type=str, required=True) -argparser.add_argument("--output", help="output file", nargs="*", type=str, required=True) -argparser.add_argument("--in_plots", help="input plot files", nargs="*", type=str, required=False) +argparser.add_argument( + "--input", help="input files", nargs="*", type=str, required=True +) +argparser.add_argument( + "--output", help="output file", nargs="*", type=str, required=True +) +argparser.add_argument( + "--in_plots", help="input plot files", nargs="*", type=str, required=False +) argparser.add_argument( "--out_plots", help="output plot files", nargs="*", type=str, required=False ) -argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False) +argparser.add_argument( + "--in_obj", help="input object files", nargs="*", type=str, required=False +) argparser.add_argument( "--out_obj", help="output object files", nargs="*", type=str, required=False ) @@ -35,7 +43,9 @@ argparser.add_argument("--channel", help="Channel", type=str, required=True) args = argparser.parse_args() -configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +configs = LegendMetadata(args.configs, lazy=True).on( + args.timestamp, system=args.datatype +) merge_config = Props.read_from( configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] ) @@ -96,7 +106,9 @@ val = val[key] fig = plt.figure() - plt.scatter([datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals) + plt.scatter( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals + ) plt.axhline(y=mean_val, color="r", linestyle="-") plt.xlabel("time") if unit is not None: diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py index f6c0878..a5310e9 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py @@ -18,7 +18,9 @@ argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) argparser.add_argument("--train_data", help="input data file", type=str, required=True) -argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True) +argparser.add_argument( + "--train_hyperpars", help="input hyperparameter file", required=True +) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py index f7f878e..a47b653 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py @@ -46,7 +46,9 @@ channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" -configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype) +configs = LegendMetadata(args.configs, lazy=True).on( + args.timestamp, system=args.datatype +) dsp_config = config_dict["inputs"]["proc_chain"][args.channel] dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] @@ -62,7 +64,9 @@ log.info("\nLoad fft data") energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0] + raw_fft = sto.read( + f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs + )[0] t1 = time.time() log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") @@ -71,13 +75,17 @@ kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ + "peak" + ].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] - log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}") + log.info( + f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}" + ) if isinstance(dsp_config, (str, list)): dsp_config = Props.read_from(dsp_config) @@ -107,9 +115,9 @@ coeffs = out_dict["dplms"].pop("coefficients") dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) - out_dict["dplms"][ - "coefficients" - ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" + out_dict["dplms"]["coefficients"] = ( + f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" + ) log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") else: diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py index 1a6f2d1..c059961 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py @@ -39,11 +39,15 @@ argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) -argparser.add_argument("--final_dsp_pars", help="final_dsp_pars", type=str, required=True) +argparser.add_argument( + "--final_dsp_pars", help="final_dsp_pars", type=str, required=True +) argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) argparser.add_argument("--plot_path", help="plot_path", type=str) -argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False) +argparser.add_argument( + "--plot_save_path", help="plot_save_path", type=str, required=False +) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) @@ -104,7 +108,9 @@ ) peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ + "peak" + ].nda ids = np.isin(peaks, peaks_rounded) peaks = peaks[ids] idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] @@ -275,9 +281,15 @@ bopt_trap.lambda_param = lambda_param bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") - bopt_cusp.add_initial_values(x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp) - bopt_zac.add_initial_values(x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac) - bopt_trap.add_initial_values(x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap) + bopt_cusp.add_initial_values( + x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp + ) + bopt_zac.add_initial_values( + x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac + ) + bopt_trap.add_initial_values( + x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap + ) best_idx = np.nanargmin(sample_y_cusp) bopt_cusp.optimal_results = results_cusp[best_idx] diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py index 1398256..2c01421 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py @@ -71,7 +71,9 @@ def get_out_data( "baseline": lgdo.Array(raw_data["baseline"].nda[final_mask]), "daqenergy": lgdo.Array(raw_data["daqenergy"].nda[final_mask]), "daqenergy_cal": lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]), - "trapTmax_cal": lgdo.Array(dsp_data["trapTmax"].nda[final_mask] * ecal_pars), + "trapTmax_cal": lgdo.Array( + dsp_data["trapTmax"].nda[final_mask] * ecal_pars + ), "peak": lgdo.Array(np.full(len(np.where(final_mask)[0]), int(peak))), } ) @@ -81,11 +83,17 @@ def get_out_data( if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) - argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) - argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True) + argparser.add_argument( + "--raw_cal", help="raw_cal", type=str, nargs="*", required=True + ) argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument("--configs", help="configs", type=str, required=True) @@ -160,16 +168,23 @@ def get_out_data( if lh5_path[-1] != "/": lh5_path += "/" - raw_fields = [field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)] + raw_fields = [ + field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path) + ] - tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"])[0] + tb = sto.read( + lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"] + )[0] discharges = tb["t_sat_lo"].nda > 0 discharge_timestamps = np.where(tb["timestamp"].nda[discharges])[0] is_recovering = np.full(len(tb), False, dtype=bool) for tstamp in discharge_timestamps: is_recovering = is_recovering | np.where( - (((tb["timestamp"].nda - tstamp) < 0.01) & ((tb["timestamp"].nda - tstamp) > 0)), + ( + ((tb["timestamp"].nda - tstamp) < 0.01) + & ((tb["timestamp"].nda - tstamp) > 0) + ), True, False, ) @@ -190,7 +205,9 @@ def get_out_data( masks[peak] = np.where(e_mask & (~is_recovering))[0] log.debug(f"{len(masks[peak])} events found in energy range for {peak}") - input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0])[0] + input_data = sto.read( + f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0] + )[0] if isinstance(dsp_config, str): dsp_config = Props.read_from(dsp_config) @@ -230,7 +247,9 @@ def get_out_data( n_rows_to_read_i = bisect_left(peak_dict["idxs"][0], n_rows_i) # now split idx into idx_i and the remainder idx_i = (peak_dict["idxs"][0][:n_rows_to_read_i],) - peak_dict["idxs"] = (peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,) + peak_dict["idxs"] = ( + peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i, + ) if len(idx_i[0]) > 0: peak_dict["obj_buf"], n_rows_read_i = sto.read( lh5_path, @@ -246,12 +265,17 @@ def get_out_data( peak_dict["obj_buf_start"] += n_rows_read_i if peak_dict["n_rows_read"] >= 10000 or file == raw_files[-1]: if "e_lower_lim" not in peak_dict: - tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict) + tb_out = run_one_dsp( + peak_dict["obj_buf"], dsp_config, db_dict=db_dict + ) energy = tb_out[energy_parameter].nda init_bin_width = ( 2 - * (np.nanpercentile(energy, 75) - np.nanpercentile(energy, 25)) + * ( + np.nanpercentile(energy, 75) + - np.nanpercentile(energy, 25) + ) * len(energy) ** (-1 / 3) ) @@ -285,10 +309,14 @@ def get_out_data( log.debug("Fit failed, using max guess") rough_adc_to_kev = peak / peak_loc e_lower_lim = ( - peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev + peak_loc + - (1.5 * peak_dict["kev_width"][0]) + / rough_adc_to_kev ) e_upper_lim = ( - peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev + peak_loc + + (1.5 * peak_dict["kev_width"][1]) + / rough_adc_to_kev ) hist, bins, var = pgh.get_hist( energy, @@ -298,8 +326,12 @@ def get_out_data( mu = pgh.get_bin_centers(bins)[np.nanargmax(hist)] updated_adc_to_kev = peak / mu - e_lower_lim = mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev - e_upper_lim = mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev + e_lower_lim = ( + mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev + ) + e_upper_lim = ( + mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev + ) log.info( f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}" ) @@ -319,13 +351,23 @@ def get_out_data( final_cut_field=final_cut_field, energy_param=energy_parameter, ) - sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a") + sto.write( + out_tbl, + name=lh5_path, + lh5_file=temp_output, + wo_mode="a", + ) peak_dict["obj_buf"] = None peak_dict["obj_buf_start"] = 0 peak_dict["n_events"] = n_wfs - log.debug(f'found {peak_dict["n_events"]} events for {peak}') + log.debug( + f'found {peak_dict["n_events"]} events for {peak}' + ) else: - if peak_dict["obj_buf"] is not None and len(peak_dict["obj_buf"]) > 0: + if ( + peak_dict["obj_buf"] is not None + and len(peak_dict["obj_buf"]) > 0 + ): tb_out = run_one_dsp( peak_dict["obj_buf"], dsp_config, db_dict=db_dict ) @@ -343,14 +385,21 @@ def get_out_data( ) peak_dict["n_events"] += n_wfs sto.write( - out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a" + out_tbl, + name=lh5_path, + lh5_file=temp_output, + wo_mode="a", ) peak_dict["obj_buf"] = None peak_dict["obj_buf_start"] = 0 - log.debug(f'found {peak_dict["n_events"]} events for {peak}') + log.debug( + f'found {peak_dict["n_events"]} events for {peak}' + ) if peak_dict["n_events"] >= n_events: peak_dict["idxs"] = None - log.debug(f"{peak} has reached the required number of events") + log.debug( + f"{peak} has reached the required number of events" + ) else: Path(temp_output).touch() diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py index 9c5d5ff..7e843e8 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py @@ -60,7 +60,9 @@ energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0] + tb_data = sto.read( + f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs + )[0] t1 = time.time() log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") @@ -81,7 +83,9 @@ tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 ) else: - out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel) + out_dict = pno.noise_optimization( + raw_files, dsp_config, db_dict.copy(), opt_dict, channel + ) t2 = time.time() log.info(f"Optimiser finished in {(t2-t0)/60} minutes") diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py index 552dd3e..1ca084b 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py +++ b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py @@ -28,7 +28,9 @@ argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) -argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False) +argparser.add_argument( + "--tcm_files", help="tcm_files", nargs="*", type=str, required=False +) args = argparser.parse_args() sto = lh5.LH5Store() @@ -50,7 +52,10 @@ if kwarg_dict["run_tau"] is True: dsp_config = Props.read_from(channel_dict) kwarg_dict.pop("run_tau") - if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist": + if ( + isinstance(args.raw_files, list) + and args.raw_files[0].split(".")[-1] == "filelist" + ): input_file = args.raw_files[0] with Path(input_file).open() as f: input_file = f.read().splitlines() @@ -83,11 +88,16 @@ is_recovering = np.full(len(data), False, dtype=bool) for tstamp in discharge_timestamps: is_recovering = is_recovering | np.where( - (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)), + ( + ((data["timestamp"] - tstamp) < 0.01) + & ((data["timestamp"] - tstamp) > 0) + ), True, False, ) - cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0] + cuts = np.where( + (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering) + )[0] tb_data = sto.read( f"{channel}/raw", diff --git a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py index 40ea3c3..7e13ed8 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py @@ -98,7 +98,12 @@ def aoe_calibration( aoe.calibrate(data, "AoE_Uncorr") log.info("Calibrated A/E") - return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe + return ( + cal_dicts, + get_results_dict(aoe), + fill_plot_dict(aoe, data, plot_options), + aoe, + ) argparser = argparse.ArgumentParser() @@ -151,7 +156,9 @@ def aoe_calibration( pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak - sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit + sigma_func = ( + eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit + ) mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 diff --git a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py index 9a2f3c5..725fc84 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py @@ -48,7 +48,9 @@ def plot_2614_timemap( plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - selection = data.query(f"{cal_energy_param}>2560&{cal_energy_param}<2660&{selection_string}") + selection = data.query( + f"{cal_energy_param}>2560&{cal_energy_param}<2660&{selection_string}" + ) fig = plt.figure() if len(selection) == 0: @@ -68,7 +70,9 @@ def plot_2614_timemap( ) ticks, labels = plt.xticks() - plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}") + plt.xlabel( + f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}" + ) plt.ylabel("Energy(keV)") plt.ylim([erange[0], erange[1]]) @@ -120,7 +124,9 @@ def plot_pulser_timemap( ) plt.ylim([mean - n_spread * spread, mean + n_spread * spread]) ticks, labels = plt.xticks() - plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}") + plt.xlabel( + f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}" + ) plt.ylabel("Energy(keV)") plt.xticks( @@ -298,7 +304,9 @@ def plot_baseline_timemap( ) ticks, labels = plt.xticks() - plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}") + plt.xlabel( + f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}" + ) plt.ylabel("Baseline Value") plt.ylim([mean - n_spread * spread, mean + n_spread * spread]) @@ -351,7 +359,9 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None): if plot_options is None: plot_options = {} plot_dict = {} - data = lh5.read_as(lh5_path, files, "pd", field_mask=["bl_mean", "baseline", "timestamp"]) + data = lh5.read_as( + lh5_path, files, "pd", field_mask=["bl_mean", "baseline", "timestamp"] + ) for key, item in plot_options.items(): if item["options"] is not None: plot_dict[key] = item["function"](data, **item["options"]) @@ -402,13 +412,21 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): dic.pop("covariance") return { - "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")), - "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")), + "total_fep": len( + data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624") + ), + "total_dep": len( + data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597") + ), "pass_fep": len( - data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}") + data.query( + f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}" + ) ), "pass_dep": len( - data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}") + data.query( + f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}" + ) ), "eres_linear": fwhm_linear, "eres_quadratic": fwhm_quad, @@ -422,8 +440,12 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="filelist", nargs="*", type=str) - argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*") argparser.add_argument("--in_hit_dict", help="in_hit_dict", required=False) @@ -498,7 +520,12 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): files, f"{channel}/dsp", hit_dict, - params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"], + params=[ + *kwarg_dict["energy_params"], + kwarg_dict["cut_param"], + "timestamp", + "trapTmax", + ], threshold=kwarg_dict["threshold"], return_selection_mask=True, cal_energy_param="trapTmax", @@ -535,7 +562,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): glines = [pk_par[0] for pk_par in pk_pars] if "cal_energy_params" not in kwarg_dict: - cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]] + cal_energy_params = [ + energy_param + "_cal" for energy_param in kwarg_dict["energy_params"] + ] else: cal_energy_params = kwarg_dict["cal_energy_params"] @@ -545,7 +574,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): plot_dict = {} full_object_dict = {} - for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): + for energy_param, cal_energy_param in zip( + kwarg_dict["energy_params"], cal_energy_params + ): e_uncal = data.query(selection_string)[energy_param].to_numpy() hist, bins, bar = pgh.get_hist( @@ -625,7 +656,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): full_object_dict[cal_energy_param], data, cal_energy_param, selection_string ) - hit_dict.update({cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()}) + hit_dict.update( + {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()} + ) if "ctc" in cal_energy_param: no_ctc_dict = full_object_dict[cal_energy_param].gen_pars_dict() no_ctc_dict["expression"] = no_ctc_dict["expression"].replace("_ctc", "") @@ -641,15 +674,15 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): if args.plot_path: param_plot_dict = {} if ~np.isnan(full_object_dict[cal_energy_param].pars).all(): - param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit( - e_uncal - ) - param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit( - e_uncal - ) - param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits( - e_uncal - ) + param_plot_dict["fwhm_fit"] = full_object_dict[ + cal_energy_param + ].plot_eres_fit(e_uncal) + param_plot_dict["cal_fit"] = full_object_dict[ + cal_energy_param + ].plot_cal_fit(e_uncal) + param_plot_dict["peak_fits"] = full_object_dict[ + cal_energy_param + ].plot_fits(e_uncal) if "plot_options" in kwarg_dict: for key, item in kwarg_dict["plot_options"].items(): @@ -738,7 +771,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) # save output dictionary - output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}}) + output_dict = convert_dict_np_to_float( + {"pars": hit_dict, "results": {"ecal": results_dict}} + ) Props.write_to(args.save_path, output_dict) # save calibration objects diff --git a/workflow/src/legenddataflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/pars_hit_qc.py index c83dff7..5e6a378 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_qc.py +++ b/workflow/src/legenddataflow/scripts/pars_hit_qc.py @@ -33,8 +33,12 @@ argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) - argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) argparser.add_argument( "--overwrite_files", help="overwrite_files", @@ -93,7 +97,10 @@ kwarg_dict_fft = kwarg_dict["fft_fields"] if len(fft_files) > 0: fft_fields = get_keys( - [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")], + [ + key.replace(f"{channel}/dsp/", "") + for key in ls(fft_files[0], f"{channel}/dsp/") + ], kwarg_dict_fft["cut_parameters"], ) @@ -132,15 +139,21 @@ hit_dict_fft.update(cut_dict) plot_dict_fft.update(cut_plots) - log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}") + log.debug( + f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}" + ) ct_mask = np.full(len(cut_data), True, dtype=bool) for outname, info in cut_dict.items(): # convert to pandas eval exp = info["expression"] for key in info.get("parameters", None): - exp = re.sub(f"(? 0)), + ( + ((data["timestamp"] - tstamp) < 0.01) + & ((data["timestamp"] - tstamp) > 0) + ), True, False, ) @@ -213,7 +235,9 @@ rng = np.random.default_rng() mask = np.full(len(data.query("~is_pulser & ~is_recovering")), False, dtype=bool) - mask[rng.choice(len(data.query("~is_pulser & ~is_recovering")), 4000, replace=False)] = True + mask[ + rng.choice(len(data.query("~is_pulser & ~is_recovering")), 4000, replace=False) + ] = True if "initial_cal_cuts" in kwarg_dict: init_cal = kwarg_dict["initial_cal_cuts"] diff --git a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py index 4ad0980..8aad849 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py @@ -57,7 +57,9 @@ def get_results_dict(aoe_class): "high_cut": aoe_class.high_cut_val, "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), - "low_side_sfs_by_run": aoe_class.low_side_sfs_by_run[tstamp].to_dict("index"), + "low_side_sfs_by_run": aoe_class.low_side_sfs_by_run[tstamp].to_dict( + "index" + ), "2_side_sfs_by_run": aoe_class.two_side_sfs_by_run[tstamp].to_dict("index"), } return result_dict @@ -123,7 +125,12 @@ def aoe_calibration( ) aoe.calibrate(data, "AoE_Uncorr") log.info("Calibrated A/E") - return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe + return ( + cal_dicts, + get_results_dict(aoe), + fill_plot_dict(aoe, data, plot_options), + aoe, + ) def run_aoe_calibration( @@ -139,14 +146,13 @@ def run_aoe_calibration( # gen_plots=True, ): configs = LegendMetadata(path=configs) - channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_aoecal"][ - "inputs" - ]["par_pht_aoecal_config"][channel] + channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"][ + "pars_pht_aoecal" + ]["inputs"]["par_pht_aoecal_config"][channel] kwarg_dict = Props.read_from(channel_dict) if kwarg_dict.pop("run_aoe") is True: - kwarg_dict.pop("pulser_multiplicity_threshold") kwarg_dict.pop("threshold") if "plot_options" in kwarg_dict: @@ -155,9 +161,15 @@ def run_aoe_calibration( pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak - mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 + mean_func = ( + eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 + ) - sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit + sigma_func = ( + eval(kwarg_dict.pop("sigma_func")) + if "sigma_func" in kwarg_dict + else SigmaFit + ) if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None: cut_dict = kwarg_dict["dt_cut"]["cut"] @@ -212,9 +224,9 @@ def eres_func(x): aoe_obj.pdf = aoe_obj.pdf.name # need to change eres func as can't pickle lambdas try: - aoe_obj.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][ - kwarg_dict["cal_energy_param"] - ]["eres_linear"] + aoe_obj.eres_func = results_dicts[next(iter(results_dicts))][ + "partition_ecal" + ][kwarg_dict["cal_energy_param"]]["eres_linear"] except KeyError: aoe_obj.eres_func = {} else: @@ -230,7 +242,9 @@ def eres_func(x): for tstamp, object_dict in object_dicts.items(): out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe_obj) - common_dict = aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None + common_dict = ( + aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None + ) out_plot_dicts = {} for tstamp, plot_dict in plot_dicts.items(): if "common" in list(plot_dict) and common_dict is not None: @@ -244,18 +258,25 @@ def eres_func(x): if __name__ == "__main__": - argparser = argparse.ArgumentParser() - argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) + argparser.add_argument( + "--input_files", help="files", type=str, nargs="*", required=True + ) argparser.add_argument( "--pulser_files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False ) - argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) - argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) + argparser.add_argument( + "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--eres_file", help="eres_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--inplots", help="eres_file", type=str, nargs="*", required=True + ) argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata", type=str) @@ -265,7 +286,9 @@ def eres_func(x): argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) + argparser.add_argument( + "--plot_file", help="plot_file", type=str, nargs="*", required=False + ) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) @@ -383,7 +406,10 @@ def eres_func(x): for tstamp in cal_dict: if tstamp not in np.unique(data["run_timestamp"]): - row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data} + row = { + key: [False] if data.dtypes[key] == "bool" else [np.nan] + for key in data + } row["run_timestamp"] = tstamp row = pd.DataFrame(row) data = pd.concat([data, row]) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/pars_pht_fast.py index a807fa6..6dda1b7 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_fast.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_fast.py @@ -44,16 +44,24 @@ def run_splitter(files): if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) + argparser.add_argument( + "--input_files", help="files", type=str, nargs="*", required=True + ) argparser.add_argument( "--pulser_files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False ) - argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) - argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) + argparser.add_argument( + "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--eres_file", help="eres_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--inplots", help="eres_file", type=str, nargs="*", required=True + ) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -63,7 +71,9 @@ def run_splitter(files): argparser.add_argument("--metadata", help="metadata path", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) + argparser.add_argument( + "--plot_file", help="plot_file", type=str, nargs="*", required=False + ) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) @@ -121,7 +131,9 @@ def run_splitter(files): final_dict[timestamp] = sorted(filelist) kwarg_dict = Props.read_from( - config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel] + config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][ + args.channel + ] ) aoe_kwarg_dict = Props.read_from( config_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel] @@ -200,7 +212,9 @@ def run_splitter(files): for tstamp in cal_dict: if tstamp not in np.unique(data["run_timestamp"]): - row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data} + row = { + key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data + } row["run_timestamp"] = tstamp row = pd.DataFrame(row) data = pd.concat([data, row]) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py index a6a231a..78c8c6e 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py @@ -153,14 +153,13 @@ def run_lq_calibration( # gen_plots=True, ): configs = LegendMetadata(path=configs) - channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_lqcal"][ - "inputs" - ]["lqcal_config"][channel] + channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"][ + "pars_pht_lqcal" + ]["inputs"]["lqcal_config"][channel] kwarg_dict = Props.read_from(channel_dict) if kwarg_dict.pop("run_lq") is True: - if "plot_options" in kwarg_dict: for field, item in kwarg_dict["plot_options"].items(): kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) @@ -208,9 +207,9 @@ def eres_func(x): ) # need to change eres func as can't pickle lambdas try: - lq_obj.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][ - kwarg_dict["cal_energy_param"] - ]["eres_linear"] + lq_obj.eres_func = results_dicts[next(iter(results_dicts))][ + "partition_ecal" + ][kwarg_dict["cal_energy_param"]]["eres_linear"] except KeyError: lq_obj.eres_func = {} else: @@ -241,16 +240,24 @@ def eres_func(x): if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) + argparser.add_argument( + "--input_files", help="files", type=str, nargs="*", required=True + ) argparser.add_argument( "--pulser_files", help="pulser_file", type=str, nargs="*", required=False ) argparser.add_argument( "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False ) - argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) - argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) + argparser.add_argument( + "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--eres_file", help="eres_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--inplots", help="eres_file", type=str, nargs="*", required=True + ) argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) @@ -260,7 +267,9 @@ def eres_func(x): argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) + argparser.add_argument( + "--plot_file", help="plot_file", type=str, nargs="*", required=False + ) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) @@ -370,7 +379,10 @@ def eres_func(x): for tstamp in cal_dict: if tstamp not in np.unique(data["run_timestamp"]): - row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data} + row = { + key: [False] if data.dtypes[key] == "bool" else [np.nan] + for key in data + } row["run_timestamp"] = tstamp row = pd.DataFrame(row) data = pd.concat([data, row]) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py index 7bd8f66..bd2d93f 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py @@ -113,13 +113,21 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): dic.pop("covariance") out_dict = { - "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")), - "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")), + "total_fep": len( + data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624") + ), + "total_dep": len( + data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597") + ), "pass_fep": len( - data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}") + data.query( + f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}" + ) ), "pass_dep": len( - data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}") + data.query( + f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}" + ) ), "eres_linear": fwhm_linear, "eres_quadratic": fwhm_quad, @@ -128,7 +136,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): "peak_param": results_dict["peak_param"], } if "calibration_parameters" in results_dict: - out_dict["calibration_parameters"] = results_dict["calibration_parameters"].to_dict() + out_dict["calibration_parameters"] = results_dict[ + "calibration_parameters" + ].to_dict() out_dict["calibration_uncertainty"] = results_dict[ "calibration_uncertainties" ].to_dict() @@ -149,13 +159,12 @@ def calibrate_partition( datatype, gen_plots=True, ): - det_status = chmap[channel]["analysis"]["usability"] configs = LegendMetadata(path=configs) - channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][ - "inputs" - ]["pars_pht_partcal_config"][channel] + channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"][ + "pars_pht_partcal" + ]["inputs"]["pars_pht_partcal_config"][channel] kwarg_dict = Props.read_from(channel_dict) @@ -201,7 +210,9 @@ def calibrate_partition( glines = [pk_par[0] for pk_par in pk_pars] if "cal_energy_params" not in kwarg_dict: - cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]] + cal_energy_params = [ + energy_param + "_cal" for energy_param in kwarg_dict["energy_params"] + ] else: cal_energy_params = kwarg_dict["cal_energy_params"] @@ -211,14 +222,17 @@ def calibrate_partition( partcal_plot_dict = {} full_object_dict = {} - for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params): + for energy_param, cal_energy_param in zip( + kwarg_dict["energy_params"], cal_energy_params + ): energy = data.query(selection_string)[energy_param].to_numpy() full_object_dict[cal_energy_param] = HPGeCalibration( energy_param, glines, 1, kwarg_dict.get("deg", 0), - debug_mode=kwarg_dict.get("debug_mode", False) | args.debug, # , fixed={1: 1} + debug_mode=kwarg_dict.get("debug_mode", False) + | args.debug, # , fixed={1: 1} ) full_object_dict[cal_energy_param].hpge_get_energy_peaks( energy, @@ -249,9 +263,9 @@ def calibrate_partition( if csqr[0] / csqr[1] < 100: allowed_p_val = ( 0.9 - * full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"][ - "peak_parameters" - ][2614.511]["p_value"] + * full_object_dict[cal_energy_param].results[ + "hpge_fit_energy_peaks" + ]["peak_parameters"][2614.511]["p_value"] ) full_object_dict[cal_energy_param] = HPGeCalibration( @@ -296,11 +310,14 @@ def calibrate_partition( full_object_dict[cal_energy_param], data, cal_energy_param, selection_string ) cal_dicts = update_cal_dicts( - cal_dicts, {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()} + cal_dicts, + {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()}, ) if "ctc" in cal_energy_param: no_ctc_dict = full_object_dict[cal_energy_param].gen_pars_dict() - no_ctc_dict["expression"] = no_ctc_dict["expression"].replace("ctc", "noctc") + no_ctc_dict["expression"] = no_ctc_dict["expression"].replace( + "ctc", "noctc" + ) cal_dicts = update_cal_dicts( cal_dicts, {cal_energy_param.replace("ctc", "noctc"): no_ctc_dict} @@ -318,42 +335,42 @@ def calibrate_partition( if gen_plots is True: param_plot_dict = {} if ~np.isnan(full_object_dict[cal_energy_param].pars).all(): - param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit( - energy - ) - param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit( - energy - ) + param_plot_dict["fwhm_fit"] = full_object_dict[ + cal_energy_param + ].plot_eres_fit(energy) + param_plot_dict["cal_fit"] = full_object_dict[ + cal_energy_param + ].plot_cal_fit(energy) if det_status == "on": param_plot_dict["cal_fit_with_errors"] = full_object_dict[ cal_energy_param ].plot_cal_fit_with_errors(energy) if ( len( - full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"][ - "peak_parameters" - ] + full_object_dict[cal_energy_param].results[ + "hpge_fit_energy_peaks" + ]["peak_parameters"] ) < 17 ): - param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits( - energy, ncols=4, nrows=4 - ) + param_plot_dict["peak_fits"] = full_object_dict[ + cal_energy_param + ].plot_fits(energy, ncols=4, nrows=4) elif ( len( - full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"][ - "peak_parameters" - ] + full_object_dict[cal_energy_param].results[ + "hpge_fit_energy_peaks" + ]["peak_parameters"] ) < 26 ): - param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits( - energy, ncols=5, nrows=5 - ) + param_plot_dict["peak_fits"] = full_object_dict[ + cal_energy_param + ].plot_fits(energy, ncols=5, nrows=5) else: - param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits( - energy, ncols=6, nrows=5 - ) + param_plot_dict["peak_fits"] = full_object_dict[ + cal_energy_param + ].plot_fits(energy, ncols=6, nrows=5) if "plot_options" in kwarg_dict: for key, item in kwarg_dict["plot_options"].items(): @@ -389,7 +406,9 @@ def calibrate_partition( for tstamp, object_dict in object_dicts.items(): out_object_dicts[tstamp] = dict(**object_dict, partition_ecal=full_object_dict) - common_dict = partcal_plot_dict.pop("common") if "common" in list(partcal_plot_dict) else None + common_dict = ( + partcal_plot_dict.pop("common") if "common" in list(partcal_plot_dict) else None + ) out_plot_dicts = {} for tstamp, plot_dict in plot_dicts.items(): if "common" in list(plot_dict) and common_dict is not None: @@ -404,16 +423,24 @@ def calibrate_partition( if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True) + argparser.add_argument( + "--input_files", help="files", type=str, nargs="*", required=True + ) argparser.add_argument( "--pulser_files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False ) - argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True) - argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True) - argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True) + argparser.add_argument( + "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--eres_file", help="eres_file", type=str, nargs="*", required=True + ) + argparser.add_argument( + "--inplots", help="eres_file", type=str, nargs="*", required=True + ) argparser.add_argument("--timestamp", help="Datatype", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -423,7 +450,9 @@ def calibrate_partition( argparser.add_argument("--metadata", help="metadata path", type=str, required=True) argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False) + argparser.add_argument( + "--plot_file", help="plot_file", type=str, nargs="*", required=False + ) argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) @@ -525,7 +554,9 @@ def calibrate_partition( for tstamp in cal_dict: if tstamp not in np.unique(data["run_timestamp"]): - row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data} + row = { + key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data + } row["run_timestamp"] = tstamp row = pd.DataFrame(row) data = pd.concat([data, row]) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/pars_pht_qc.py index e1cf4dd..feee4e5 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_qc.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_qc.py @@ -51,7 +51,9 @@ argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) + argparser.add_argument( + "--plot_path", help="plot_path", type=str, nargs="*", required=False + ) argparser.add_argument( "--save_path", help="save_path", @@ -156,15 +158,21 @@ hit_dict_fft.update(cut_dict) plot_dict_fft.update(cut_plots) - log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}") + log.debug( + f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}" + ) ct_mask = np.full(len(cut_data), True, dtype=bool) for outname, info in cut_dict.items(): # convert to pandas eval exp = info["expression"] for key in info.get("parameters", None): - exp = re.sub(f"(? 0)), + ( + ((data["timestamp"] - tstamp) <= 0.01) + & ((data["timestamp"] - tstamp) > 0) + ), True, False, ) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py index b48211f..71167df 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py +++ b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py @@ -39,7 +39,9 @@ argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False) + argparser.add_argument( + "--plot_path", help="plot_path", type=str, nargs="*", required=False + ) argparser.add_argument( "--save_path", help="save_path", @@ -75,7 +77,9 @@ else: run_files = sorted(np.unique(run_files)) phy_files += run_files - bls = sto.read("ch1027200/dsp/", run_files, field_mask=["wf_max", "bl_mean"])[0] + bls = sto.read( + "ch1027200/dsp/", run_files, field_mask=["wf_max", "bl_mean"] + )[0] puls = sto.read("ch1027201/dsp/", run_files, field_mask=["trapTmax"])[0] bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & ( puls["trapTmax"].nda < 200 @@ -87,12 +91,17 @@ phy_files = sorted(np.unique(phy_files)) bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0] puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0] - bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200) + bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & ( + puls["trapTmax"].nda < 200 + ) kwarg_dict_fft = kwarg_dict["fft_fields"] cut_fields = get_keys( - [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")], + [ + key.replace(f"{channel}/dsp/", "") + for key in ls(phy_files[0], f"{channel}/dsp/") + ], kwarg_dict_fft["cut_parameters"], ) @@ -108,7 +117,10 @@ is_recovering = np.full(len(data), False, dtype=bool) for tstamp in discharge_timestamps: is_recovering = is_recovering | np.where( - (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)), + ( + ((data["timestamp"] - tstamp) < 0.01) + & ((data["timestamp"] - tstamp) > 0) + ), True, False, ) @@ -138,7 +150,9 @@ exp = info["expression"] for key in info.get("parameters", None): exp = re.sub(f"(? Date: Thu, 30 Jan 2025 17:10:15 +0100 Subject: [PATCH 073/101] fix a couple of problems with tests and setup GHA --- .github/dependabot.yml | 7 +++ .github/workflows/main.yml | 59 ++++++++++++++++++++++++ tests/test_util.py | 94 ++++++++++++-------------------------- 3 files changed, 94 insertions(+), 66 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/main.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..f9ecf57 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..974d240 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,59 @@ +name: lgdo + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + - "releases/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 + +jobs: + build-and-test: + name: Test lgdo with Python + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.11", "3.12"] + os: [ubuntu-latest, macos-13] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Get dependencies and install legend-dataflow + run: | + python -m pip install --upgrade pip wheel setuptools + python -m pip install --upgrade .[test] + - name: Run unit tests + run: | + python -m pytest + + test-coverage: + name: Calculate and upload test coverage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Generate Report + run: | + python -m pip install --upgrade pip wheel setuptools + python -m pip install --upgrade .[test] + python -m pytest --cov=legenddataflow --cov-report=xml + - name: Upload Coverage to codecov.io + uses: codecov/codecov-action@v5 diff --git a/tests/test_util.py b/tests/test_util.py index c1654e0..38d8910 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,33 +1,25 @@ import json from pathlib import Path -from scripts.library import ( - CalibCatalog, +from legenddataflow import ( FileKey, - pars_catalog, - pars_key_resolve, + ParsKeyResolve, + patterns, subst_vars, - unix_time, -) -from scripts.library.patterns import get_pattern_tier_daq, get_pattern_tier_dsp -from scripts.library.utils import ( - par_dsp_path, - par_overwrite_path, - tier_dsp_path, - tier_path, + utils, ) testprod = Path(__file__).parent / "dummy_cycle" -with testprod.open() as r: +with (testprod / "config.json").open() as r: setup = json.load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] def test_util(): - assert tier_path(setup) == str(testprod / "generated/tier") - assert unix_time("20230101T123456Z") == 1672572896.0 + assert utils.tier_path(setup) == str(testprod / "generated/tier") + assert utils.unix_time("20230101T123456Z") == 1672572896.0 def test_filekey(): @@ -44,13 +36,13 @@ def test_filekey(): ) assert key.name == "l200-p00-r000-cal-20230101T123456Z" assert ( - key.get_path_from_filekey(get_pattern_tier_dsp(setup))[0] - == f"{tier_dsp_path(setup)}/cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5" + key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0] + == f"{utils.get_tier_path(setup, 'dsp')}/cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5" ) assert ( FileKey.get_filekey_from_pattern( - key.get_path_from_filekey(get_pattern_tier_dsp(setup))[0], - get_pattern_tier_dsp(setup), + key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0], + utils.get_pattern_tier(setup, "dsp"), ).name == key.name ) @@ -59,29 +51,29 @@ def test_filekey(): def test_create_pars_keylist(): key1 = FileKey("l200", "p00", "r000", "cal", "20230101T123456Z") assert ( - pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from + ParsKeyResolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from == "20230101T123456Z" ) key2 = FileKey("l200", "p00", "r000", "cal", "20230102T123456Z") - assert pars_key_resolve.match_keys(key1, key2) == key1 + assert ParsKeyResolve.match_keys(key1, key2) == key1 key3 = FileKey("l200", "p00", "r000", "cal", "20230101T000000Z") - assert pars_key_resolve.match_keys(key1, key3) == key3 - assert pars_key_resolve.generate_par_keylist([key1, key2, key3]) == [key3] - pkey1 = pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}) - pkey2 = pars_key_resolve.from_filekey( - FileKey("l200", "p00", "r000", "lar", "20230102T123456Z"), {"lar": ["par_dsp"]} + assert ParsKeyResolve.match_keys(key1, key3) == key3 + assert ParsKeyResolve.generate_par_keylist([key1, key2, key3]) == [key3] + pkey1 = ParsKeyResolve.from_filekey(key1, {"cal": ["par_dsp"]}) + pkey2 = ParsKeyResolve.from_filekey( + FileKey("l200", "p00", "r000", "lar", "20230102T123456Z"), {"lar": "par_dsp"} ) assert pkey2.apply == [ - "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json" + "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml" ] - pars_key_resolve.match_entries(pkey1, pkey2) + ParsKeyResolve.match_entries(pkey1, pkey2) assert set(pkey2.apply) == { - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", - "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json", + "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml", + "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml", } keylist = sorted( - pars_key_resolve.get_keys("-*-*-*-cal", get_pattern_tier_daq(setup)), + ParsKeyResolve.get_keys("-*-*-*-cal", patterns.get_pattern_tier_daq(setup)), key=FileKey.get_unix_timestamp, ) assert keylist == [ @@ -89,7 +81,9 @@ def test_create_pars_keylist(): FileKey("l200", "p00", "r001", "cal", "20230202T004321Z"), ] - keylist += pars_key_resolve.get_keys("-*-*-*-lar", get_pattern_tier_daq(setup)) + keylist += ParsKeyResolve.get_keys( + "-*-*-*-lar", patterns.get_pattern_tier_daq(setup) + ) keylist = sorted(keylist, key=FileKey.get_unix_timestamp) assert keylist == [ FileKey("l200", "p00", "r000", "cal", "20230101T123456Z"), @@ -97,45 +91,13 @@ def test_create_pars_keylist(): FileKey("l200", "p00", "r001", "cal", "20230202T004321Z"), ] - pkeylist = pars_key_resolve.generate_par_keylist(keylist) + pkeylist = ParsKeyResolve.generate_par_keylist(keylist) assert pkeylist == keylist assert set( - pars_key_resolve.match_all_entries( + ParsKeyResolve.match_all_entries( pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]} )[1].apply ) == { "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", } - - -def test_pars_loading(): - pars_files = CalibCatalog.get_calib_files( - Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z" - ) - assert pars_files == [ - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json" - ] - - par_override_files = CalibCatalog.get_calib_files( - Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z" - ) - - pars_files, pars_files_overwrite = pars_catalog.match_pars_files( - pars_files, par_override_files - ) - - assert pars_files == [ - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json" - ] - - assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == { - ( - Path(par_dsp_path(setup)) - / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", - ), - ( - Path(par_overwrite_path(setup)) - / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json", - ), - } From 385874e061d02238e15208c8a86fa135349e67c1 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 17:11:50 +0100 Subject: [PATCH 074/101] fix docs --- .readthedocs.yaml | 1 - docs/Makefile | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 103c066..d2e9f58 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,7 +17,6 @@ build: --module-first --force --output-dir docs/source/api - workflow/scripts workflow/src workflow/rules - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D diff --git a/docs/Makefile b/docs/Makefile index b85f221..667234c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,8 +17,7 @@ apidoc: clean-apidoc --module-first \ --force \ --output-dir "$(SOURCEDIR)/api" \ - ../workflow/src/legenddataflow \ - ../workflow/scripts \ + ../workflow/src \ ../workflow/rules clean-apidoc: From 902e248d49fe097b545beea6fcfc41757ff16f88 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 30 Jan 2025 17:12:40 +0100 Subject: [PATCH 075/101] fix coverage upload in ci --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 974d240..c080083 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -48,7 +48,7 @@ jobs: fetch-depth: 2 - uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: Generate Report run: | From 0dec815c741129846e58288994dcaf9b4fb69718 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 31 Jan 2025 17:30:54 +0100 Subject: [PATCH 076/101] add timestamp to log files and don't remove on completion --- .pre-commit-config.yaml | 1 + workflow/Snakefile | 20 ++++---------------- workflow/Snakefile-build-raw | 3 +++ workflow/rules/ann.smk | 4 ++-- workflow/rules/blinding_calibration.smk | 2 +- workflow/rules/blinding_check.smk | 2 +- workflow/rules/dsp.smk | 2 +- workflow/rules/dsp_pars_geds.smk | 14 +++++++------- workflow/rules/evt.smk | 6 +++--- workflow/rules/hit.smk | 10 +++++----- workflow/rules/pht.smk | 16 ++++++++++------ workflow/rules/pht_fast.smk | 3 ++- workflow/rules/psp.smk | 2 +- workflow/rules/psp_pars_geds.smk | 7 ++++--- workflow/rules/qc_phy.smk | 3 ++- workflow/rules/raw.smk | 6 +++--- workflow/rules/skm.smk | 2 +- workflow/rules/tcm.smk | 4 ++-- workflow/src/legenddataflow/cal_grouping.py | 5 ++++- workflow/src/legenddataflow/patterns.py | 9 ++++++--- 20 files changed, 63 insertions(+), 58 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0611a74..8f713be 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -103,3 +103,4 @@ repos: hooks: - id: snakefmt files: Snakefile*|\.smk + exclude: channel_merge.smk diff --git a/workflow/Snakefile b/workflow/Snakefile index 9fa6950..50be710 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -32,10 +32,10 @@ det_status = utils.det_status_path(config) swenv = utils.runcmd(config) basedir = workflow.basedir -# wait for new pylegendmeta release -# if not Path(meta).exists(): -# meta = LegendMetadata() -# meta.checkout(config["configs"]["l200"]["legend_metadata_version"]) +time = datetime.now().strftime("%Y%m%dT%H%M%SZ") + +if not Path(meta).exists(): + LegendMetadata().checkout(config["legend_metadata_version"]) part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml") @@ -146,18 +146,6 @@ onsuccess: if os.path.exists(utils.filelist_path(config)): os.rmdir(utils.filelist_path(config)) - # remove logs - files = glob.glob(os.path.join(utils.tmp_log_path(config), "*", "*.log")) - for file in files: - if os.path.isfile(file): - os.remove(file) - dirs = glob.glob(os.path.join(utils.tmp_log_path(config), "*")) - for d in dirs: - if os.path.isdir(d): - os.rmdir(d) - if os.path.exists(utils.tmp_log_path(config)): - os.rmdir(utils.tmp_log_path(config)) - rule gen_filelist: """Generate file list. diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index fafd20c..da0d58d 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -10,6 +10,7 @@ import os, sys from pathlib import Path from legenddataflow import patterns as patt from legenddataflow import utils, execenv, ParsKeyResolve +from datetime import datetime utils.subst_vars_in_snakemake_config(workflow, config) @@ -18,6 +19,8 @@ swenv = execenv.execenv_prefix(config) meta_path = utils.metadata_path(config) det_status = utils.det_status_path(config) +time = datetime.now().strftime("%Y%m%dT%H%M%SZ") + if not Path(meta_path).exists(): LegendMetadata(meta_path).checkout(config["legend_metadata_version"]) diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index 2565514..d572b06 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -22,7 +22,7 @@ rule build_ann: tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), db_file=get_pattern_pars_tmp(setup, "ann_db"), log: - get_pattern_log(setup, "tier_ann"), + get_pattern_log(setup, "tier_ann", time), group: "tier-ann" resources: @@ -54,7 +54,7 @@ rule build_pan: tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), db_file=get_pattern_pars_tmp(setup, "pan_db"), log: - get_pattern_log(setup, "tier_pan"), + get_pattern_log(setup, "tier_pan", time), group: "tier-ann" resources: diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index b8076d7..537a051 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -31,7 +31,7 @@ rule build_blinding_calibration: par_file=temp(get_pattern_pars_tmp_channel(setup, "raw_blindcal")), plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw_blindcal")), log: - get_pattern_log_channel(setup, "pars_hit_blind_cal"), + get_pattern_log_channel(setup, "pars_hit_blind_cal", time), group: "par-raw-blinding" resources: diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index b142c19..ba552fc 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -32,7 +32,7 @@ rule build_blinding_check: check_file=temp(get_pattern_pars_tmp_channel(setup, "raw")), plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw")), log: - get_pattern_log_channel(setup, "pars_hit_blind_check"), + get_pattern_log_channel(setup, "pars_hit_blind_check", time), group: "par-hit" resources: diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index 501ed52..d40de3d 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -185,7 +185,7 @@ rule build_dsp: tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), db_file=get_pattern_pars_tmp(setup, "dsp_db"), log: - get_pattern_log(setup, "tier_dsp"), + get_pattern_log(setup, "tier_dsp", time), group: "tier-dsp" resources: diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index f526d6b..8f030cb 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -35,7 +35,7 @@ rule build_pars_dsp_tau_geds: decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), log: - get_pattern_log_channel(setup, "par_dsp_decay_constant"), + get_pattern_log_channel(setup, "par_dsp_decay_constant", time), group: "par-dsp" resources: @@ -69,7 +69,7 @@ rule build_pars_evtsel_geds: output: peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")), log: - get_pattern_log_channel(setup, "par_dsp_event_selection"), + get_pattern_log_channel(setup, "par_dsp_event_selection", time), group: "par-dsp" resources: @@ -108,7 +108,7 @@ rule build_pars_dsp_nopt_geds: ), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")), log: - get_pattern_log_channel(setup, "par_dsp_noise_optimization"), + get_pattern_log_channel(setup, "par_dsp_noise_optimization", time), group: "par-dsp" resources: @@ -148,7 +148,7 @@ rule build_pars_dsp_dplms_geds: ), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), log: - get_pattern_log_channel(setup, "pars_dsp_dplms"), + get_pattern_log_channel(setup, "pars_dsp_dplms", time), group: "par-dsp" resources: @@ -187,7 +187,7 @@ rule build_pars_dsp_eopt_geds: ), plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_eopt"), + get_pattern_log_channel(setup, "pars_dsp_eopt", time), group: "par-dsp" resources: @@ -217,7 +217,7 @@ rule build_svm_dsp_geds: output: dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"), + get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal", time), group: "par-dsp-svm" resources: @@ -238,7 +238,7 @@ rule build_pars_dsp_svm_geds: output: dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_svm"), + get_pattern_log_channel(setup, "pars_dsp_svm", time), group: "par-dsp" resources: diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index d14b8cb..02b7849 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -35,7 +35,7 @@ rule build_evt: tier="evt", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, f"tier_evt"), + get_pattern_log(setup, f"tier_evt", time), group: "tier-evt" resources: @@ -88,7 +88,7 @@ rule build_pet: tier="pet", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, f"tier_pet"), + get_pattern_log(setup, f"tier_pet", time), group: "tier-evt" resources: @@ -140,7 +140,7 @@ for evt_tier in ("evt", "pet"): lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", ro_input=lambda _, input: utils.as_ro(setup, input), log: - get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"), + get_pattern_log_concat(setup, f"tier_{evt_tier}_concat", time), group: "tier-evt" shell: diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 0af7590..1938779 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -53,7 +53,7 @@ rule build_qc: qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")), plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")), log: - get_pattern_log_channel(setup, "pars_hit_qc"), + get_pattern_log_channel(setup, "pars_hit_qc", time), group: "par-hit" resources: @@ -102,7 +102,7 @@ rule build_energy_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "energy_cal")), log: - get_pattern_log_channel(setup, "pars_hit_energy_cal"), + get_pattern_log_channel(setup, "pars_hit_energy_cal", time), group: "par-hit" resources: @@ -151,7 +151,7 @@ rule build_aoe_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal")), log: - get_pattern_log_channel(setup, "pars_hit_aoe_cal"), + get_pattern_log_channel(setup, "pars_hit_aoe_cal", time), group: "par-hit" resources: @@ -198,7 +198,7 @@ rule build_lq_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit")), log: - get_pattern_log_channel(setup, "pars_hit_lq_cal"), + get_pattern_log_channel(setup, "pars_hit_lq_cal", time), group: "par-hit" resources: @@ -325,7 +325,7 @@ rule build_hit: tier="hit", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, "tier_hit"), + get_pattern_log(setup, "tier_hit", time), group: "tier-hit" resources: diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 27e4f81..937cd27 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -117,6 +117,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_qc", ), group: @@ -171,7 +172,7 @@ rule build_pht_qc: hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")), log: - get_pattern_log_channel(setup, "par_pht_qc"), + get_pattern_log_channel(setup, "par_pht_qc", time), group: "par-pht" resources: @@ -234,7 +235,7 @@ rule build_per_energy_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")), log: - get_pattern_log_channel(setup, "par_pht_energy_cal"), + get_pattern_log_channel(setup, "par_pht_energy_cal", time), group: "par-pht" resources: @@ -344,6 +345,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_partcal", ), group: @@ -406,7 +408,7 @@ rule build_pht_energy_super_calibrations: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")), log: - get_pattern_log_channel(setup, "par_pht_partcal"), + get_pattern_log_channel(setup, "par_pht_partcal", time), group: "par-pht" resources: @@ -526,6 +528,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_aoe", ), group: @@ -588,7 +591,7 @@ rule build_pht_aoe_calibrations: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")), log: - get_pattern_log_channel(setup, "par_pht_aoe_cal"), + get_pattern_log_channel(setup, "par_pht_aoe_cal", time), group: "par-pht" resources: @@ -706,6 +709,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_lq", ), group: @@ -763,7 +767,7 @@ rule build_pht_lq_calibration: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), log: - get_pattern_log_channel(setup, "par_pht_lq_cal"), + get_pattern_log_channel(setup, "par_pht_lq_cal", time), group: "par-pht" resources: @@ -893,7 +897,7 @@ rule build_pht: tier="pht", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, "tier_pht"), + get_pattern_log(setup, "tier_pht", time), group: "tier-pht" resources: diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk index 75d8e7e..b017e89 100644 --- a/workflow/rules/pht_fast.smk +++ b/workflow/rules/pht_fast.smk @@ -96,6 +96,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_fast", ), group: @@ -156,7 +157,7 @@ rule par_pht_fast: ), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), log: - get_pattern_log_channel(setup, "par_pht_fast"), + get_pattern_log_channel(setup, "par_pht_fast", time), group: "par-pht" resources: diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index d55fbcc..02df934 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -169,7 +169,7 @@ rule build_psp: tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle), db_file=get_pattern_pars_tmp(setup, "psp_db"), log: - get_pattern_log(setup, "tier_psp"), + get_pattern_log(setup, "tier_psp", time), group: "tier-dsp" resources: diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 9e14cad..a7938f4 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -87,6 +87,7 @@ for key, dataset in part.datasets.items(): partition, key, "psp", + time, name="par_psp", ), group: @@ -134,7 +135,7 @@ rule build_par_psp: ), psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), log: - get_pattern_log_channel(setup, "pars_psp"), + get_pattern_log_channel(setup, "pars_psp", time), group: "par-psp" resources: @@ -174,7 +175,7 @@ rule build_svm_psp: output: dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"), + get_pattern_log(setup, "pars_psp_svm", time).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -195,7 +196,7 @@ rule build_pars_psp_svm: output: dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), log: - get_pattern_log_channel(setup, "pars_dsp_svm"), + get_pattern_log_channel(setup, "pars_dsp_svm", time), group: "par-dsp" resources: diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index 982ab4e..b04f1ef 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -57,6 +57,7 @@ for key, dataset in part.datasets.items(): partition, key, "pht", + time, name="par_pht_qc_phy", ), group: @@ -100,7 +101,7 @@ rule build_pht_qc_phy: hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")), plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")), log: - get_pattern_log_channel(setup, "pars_pht_qc_phy"), + get_pattern_log_channel(setup, "pars_pht_qc_phy", time), group: "par-pht" resources: diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index f647095..4bd1e7a 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -32,7 +32,7 @@ rule build_raw_orca: output: get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_raw"), + get_pattern_log(setup, "tier_raw", time), group: "tier-raw" resources: @@ -62,7 +62,7 @@ rule build_raw_fcio: output: get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_raw"), + get_pattern_log(setup, "tier_raw", time), group: "tier-raw" resources: @@ -96,7 +96,7 @@ rule build_raw_blind: output: get_pattern_tier_raw_blind(setup), log: - str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"), + str(get_pattern_log(setup, "tier_raw_blind", time)).replace("{datatype}", "phy"), group: "tier-raw" resources: diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index d3c5d51..ac05738 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -20,7 +20,7 @@ rule build_skm: datatype="phy", ro_input=lambda _, input: ro(input), log: - get_pattern_log_concat(setup, "tier_skm"), + get_pattern_log_concat(setup, "tier_skm", time), group: "tier-skm" resources: diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index 6fa85a9..2bc1686 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -21,7 +21,7 @@ rule build_tier_tcm: output: get_pattern_tier(setup, "tcm", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_tcm"), + get_pattern_log(setup, "tier_tcm", time), group: "tier-tcm" resources: @@ -51,7 +51,7 @@ rule build_pulser_ids: output: pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")), log: - get_pattern_log_channel(setup, "tcm_pulsers"), + get_pattern_log_channel(setup, "tcm_pulsers", time), group: "tier-tcm" resources: diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py index ce06c1d..5c19ea7 100644 --- a/workflow/src/legenddataflow/cal_grouping.py +++ b/workflow/src/legenddataflow/cal_grouping.py @@ -170,6 +170,7 @@ def get_log_file( dataset, channel, tier, + processing_timestamp, experiment="l200", datatype="cal", name=None, @@ -188,7 +189,9 @@ def get_log_file( fk.channel = "{channel}" else: fk.channel = channel - return fk.get_path_from_filekey(get_pattern_log_channel(self.setup, name))[0] + return fk.get_path_from_filekey( + get_pattern_log_channel(self.setup, name, processing_timestamp) + )[0] def get_timestamp( self, catalog, dataset, channel, tier, experiment="l200", datatype="cal" diff --git a/workflow/src/legenddataflow/patterns.py b/workflow/src/legenddataflow/patterns.py index 71f5db4..b05be0a 100644 --- a/workflow/src/legenddataflow/patterns.py +++ b/workflow/src/legenddataflow/patterns.py @@ -289,9 +289,10 @@ def get_pattern_plts(setup, tier, name=None): ) -def get_pattern_log(setup, processing_step): +def get_pattern_log(setup, processing_step, time): return ( Path(f"{tmp_log_path(setup)}") + / time / processing_step / ( "{experiment}-{period}-{run}-{datatype}-{timestamp}-" @@ -301,9 +302,10 @@ def get_pattern_log(setup, processing_step): ) -def get_pattern_log_channel(setup, processing_step): +def get_pattern_log_channel(setup, processing_step, time): return ( Path(f"{tmp_log_path(setup)}") + / time / processing_step / ( "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" @@ -313,9 +315,10 @@ def get_pattern_log_channel(setup, processing_step): ) -def get_pattern_log_concat(setup, processing_step): +def get_pattern_log_concat(setup, processing_step, time): return ( Path(f"{tmp_log_path(setup)}") + / time / processing_step / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log") ) From faf9214dbed37d3e2ff60953aed6237a8046a070 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 3 Feb 2025 23:06:50 +0100 Subject: [PATCH 077/101] setup to config and debug --- workflow/rules/ann.smk | 20 ++-- workflow/rules/blinding_calibration.smk | 18 ++-- workflow/rules/blinding_check.smk | 18 ++-- workflow/rules/chanlist_gen.smk | 42 ++++---- workflow/rules/common.smk | 28 +++--- workflow/rules/dsp.smk | 38 ++++---- workflow/rules/dsp_pars_geds.smk | 88 ++++++++--------- workflow/rules/evt.smk | 42 ++++---- workflow/rules/filelist_gen.smk | 36 +++---- workflow/rules/hit.smk | 96 +++++++++---------- workflow/rules/main.smk | 12 +-- workflow/rules/pht.smk | 122 ++++++++++++------------ workflow/rules/pht_fast.smk | 18 ++-- workflow/rules/psp.smk | 38 ++++---- workflow/rules/psp_pars_geds.smk | 42 ++++---- workflow/rules/qc_phy.smk | 18 ++-- workflow/rules/raw.smk | 26 ++--- workflow/rules/skm.smk | 6 +- workflow/rules/tcm.smk | 12 +-- 19 files changed, 361 insertions(+), 359 deletions(-) diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index d572b06..7a50005 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -13,16 +13,16 @@ from legenddataflow.patterns import ( rule build_ann: input: - dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", datatype="{datatype}", output: - tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "ann_db"), + tier_file=get_pattern_tier(config, "ann", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "ann_db"), log: - get_pattern_log(setup, "tier_ann", time), + get_pattern_log(config, "tier_ann", time), group: "tier-ann" resources: @@ -30,7 +30,7 @@ rule build_ann: mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_dsp.py')} " + "{basedir}/../scripts/build_dsp.py " "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -45,16 +45,16 @@ rule build_ann: rule build_pan: input: - dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False), pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"), params: timestamp="{timestamp}", datatype="{datatype}", output: - tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "pan_db"), + tier_file=get_pattern_tier(config, "pan", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "pan_db"), log: - get_pattern_log(setup, "tier_pan", time), + get_pattern_log(config, "tier_pan", time), group: "tier-ann" resources: @@ -62,7 +62,7 @@ rule build_pan: mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: "{swenv} python3 -B " - f"{workflow.source_path('../scripts/build_dsp.py')} " + "{basedir}/../scripts/build_dsp.py " "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index 537a051..8407893 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -20,7 +20,7 @@ rule build_blinding_calibration: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=Path(filelist_path(setup)) + files=Path(filelist_path(config)) / "all-{experiment}-{period}-{run}-cal-raw.filelist", params: timestamp="{timestamp}", @@ -28,10 +28,10 @@ rule build_blinding_calibration: channel="{channel}", meta=meta, output: - par_file=temp(get_pattern_pars_tmp_channel(setup, "raw_blindcal")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw_blindcal")), + par_file=temp(get_pattern_pars_tmp_channel(config, "raw_blindcal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "raw_blindcal")), log: - get_pattern_log_channel(setup, "pars_hit_blind_cal", time), + get_pattern_log_channel(config, "pars_hit_blind_cal", time), group: "par-raw-blinding" resources: @@ -53,7 +53,7 @@ rule build_blinding_calibration: rule build_plts_blinding: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -62,7 +62,7 @@ rule build_plts_blinding: name="blindcal", ), output: - get_pattern_plts(setup, "raw", name="blindcal"), + get_pattern_plts(config, "raw", name="blindcal"), group: "merge-blindcal" shell: @@ -75,7 +75,7 @@ rule build_plts_blinding: rule build_pars_blinding: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -83,9 +83,9 @@ rule build_pars_blinding: chan_maps, name="blindcal", ), - plts=get_pattern_plts(setup, "raw", name="blindcal"), + plts=get_pattern_plts(config, "raw", name="blindcal"), output: - get_pattern_pars(setup, "raw", name="blindcal", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "raw", name="blindcal", check_in_cycle=check_in_cycle), group: "merge-blindcal" shell: diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index ba552fc..916009f 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -21,7 +21,7 @@ rule build_blinding_check: if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data """ input: - files=Path(filelist_path(setup)) + files=Path(filelist_path(config)) / "all-{experiment}-{period}-{run}-cal-raw.filelist", par_file=get_blinding_curve_file, params: @@ -29,10 +29,10 @@ rule build_blinding_check: datatype="cal", channel="{channel}", output: - check_file=temp(get_pattern_pars_tmp_channel(setup, "raw")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw")), + check_file=temp(get_pattern_pars_tmp_channel(config, "raw")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "raw")), log: - get_pattern_log_channel(setup, "pars_hit_blind_check", time), + get_pattern_log_channel(config, "pars_hit_blind_check", time), group: "par-hit" resources: @@ -55,7 +55,7 @@ rule build_blinding_check: rule build_plts_raw: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -63,7 +63,7 @@ rule build_plts_raw: chan_maps, ), output: - get_pattern_plts(setup, "raw"), + get_pattern_plts(config, "raw"), group: "merge-raw" shell: @@ -76,7 +76,7 @@ rule build_plts_raw: rule build_pars_raw: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "raw", basedir, @@ -84,11 +84,11 @@ rule build_pars_raw: chan_maps, ), plts=get_pattern_plts( - setup, + config, "raw", ), output: - get_pattern_pars(setup, "raw", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "raw", check_in_cycle=check_in_cycle), group: "merge-raw" shell: diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index 4e46f13..06200e3 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -9,16 +9,11 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) -from legenddataflow.utils import filelist_path, runcmd +from legenddataflow import execenv_smk_py_script +from legenddataflow.utils import filelist_path -def get_par_chanlist( - setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" -): - tier_pattern = "((?P[^_]+)(\\_(?P[^_]+)(\\_(?P[^_]+)?)?)?)?" - keypart_rx = re.compile(tier_pattern) - d = keypart_rx.match(tier).groupdict() - +def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps): key = ChannelProcKey.parse_keypart(keypart) flist_path = filelist_path(setup) @@ -28,37 +23,36 @@ def get_par_chanlist( f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" - cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" + cmd = "create_chankeylist" # execenv_smk_py_script(workflow, config, )[0] + cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " + cmd += f"--datatype cal --output_file {output_file}" os.system(cmd) with open(output_file) as r: chan_list = r.read().splitlines() + os.remove(output_file) + return chan_list + + +def get_par_chanlist( + setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml" +): + + chan_list = get_chanlist(setup, keypart, workflow, config, det_status, chan_maps) par_pattern = get_pattern_pars_tmp_channel(setup, tier, name, extension) filenames = ChannelProcKey.get_channel_files(keypart, par_pattern, chan_list) - os.remove(output_file) + return filenames def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None): - key = ChannelProcKey.parse_keypart(keypart) - - output_file = os.path.join( - filelist_path(setup), - f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", - ) - cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}" - cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}" - os.system(cmd) - - with open(output_file) as r: - chan_list = r.read().splitlines() + chan_list = get_chanlist(setup, keypart, workflow, config, det_status, chan_maps) par_pattern = get_pattern_plts_tmp_channel(setup, tier, name) filenames = ChannelProcKey.get_channel_files(keypart, par_pattern, chan_list) - os.remove(output_file) + return filenames diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 17571e3..5a9bff2 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -10,20 +10,20 @@ from legenddataflow import utils def ro(path): - return utils.as_ro(setup, path) + return utils.as_ro(config, path) def get_blinding_curve_file(wildcards): """func to get the blinding calibration curves from the overrides""" par_files = Catalog.get_files( - Path(patt.par_overwrite_path(setup)) / "raw" / "validity.yaml", + Path(patt.par_overwrite_path(config)) / "raw" / "validity.yaml", wildcards.timestamp, ) if isinstance(par_files, str): - return str(Path(patt.par_overwrite_path(setup)) / "raw" / par_files) + return str(Path(patt.par_overwrite_path(config)) / "raw" / par_files) else: return [ - str(Path(patt.par_overwrite_path(setup)) / "raw" / par_file) + str(Path(patt.par_overwrite_path(config)) / "raw" / par_file) for par_file in par_files ] @@ -31,13 +31,13 @@ def get_blinding_curve_file(wildcards): def get_blinding_check_file(wildcards): """func to get the right blinding check file""" par_files = Catalog.get_files( - Path(patt.get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp + Path(patt.get_pars_path(config, "raw")) / "validity.yaml", wildcards.timestamp ) if isinstance(par_files, str): - return Path(patt.get_pars_path(setup, "raw")) / par_files + return Path(patt.get_pars_path(config, "raw")) / par_files else: return [ - Path(patt.get_pars_path(setup, "raw")) / par_file for par_file in par_files + Path(patt.get_pars_path(config, "raw")) / par_file for par_file in par_files ] @@ -67,19 +67,19 @@ def set_last_rule_name(workflow, new_name): def get_input_par_file(wildcards, tier, name): - par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml" + par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml" pars_files_overwrite = Catalog.get_files( par_overwrite_file, wildcards.timestamp, ) for pars_file in pars_files_overwrite: if name in str(pars_file): - return Path(patt.par_overwrite_path(setup)) / tier / pars_file + return Path(patt.par_overwrite_path(config)) / tier / pars_file raise ValueError(f"Could not find model in {pars_files_overwrite}") def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): - par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml" + par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml" if timestamp is not None: pars_files_overwrite = Catalog.get_files( par_overwrite_file, @@ -97,7 +97,7 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None): out_files = [] for pars_file in pars_files_overwrite: if fullname in str(pars_file): - out_files.append(Path(patt.par_overwrite_path(setup)) / tier / pars_file) + out_files.append(Path(patt.par_overwrite_path(config)) / tier / pars_file) if len(out_files) == 0: raise ValueError(f"Could not find name in {pars_files_overwrite}") else: @@ -109,8 +109,8 @@ def get_search_pattern(tier): This func gets the search pattern for the relevant tier passed. """ if tier == "daq": - return patt.get_pattern_tier_daq_unsorted(setup, extension="*") + return patt.get_pattern_tier_daq_unsorted(config, extension="*") elif tier == "raw": - return patt.get_pattern_tier_daq(setup, extension="*") + return patt.get_pattern_tier_daq(config, extension="*") else: - return patt.get_pattern_tier(setup, "raw", check_in_cycle=False) + return patt.get_pattern_tier(config, "raw", check_in_cycle=False) diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index d40de3d..8340dc8 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -18,11 +18,11 @@ from legenddataflow.patterns import ( dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) -dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml" +dsp_par_cat_file = Path(pars_path(config)) / "dsp" / "validity.yaml" if dsp_par_cat_file.is_file(): dsp_par_cat_file.unlink() Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) @@ -32,7 +32,7 @@ ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) rule build_plts_dsp: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, @@ -43,7 +43,7 @@ rule build_plts_dsp: timestamp="{timestamp}", datatype="cal", output: - get_pattern_plts(setup, "dsp"), + get_pattern_plts(config, "dsp"), group: "merge-dsp" shell: @@ -57,7 +57,7 @@ rule build_plts_dsp: rule build_pars_dsp_objects: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, @@ -71,7 +71,7 @@ rule build_pars_dsp_objects: datatype="cal", output: get_pattern_pars( - setup, + config, "dsp", name="objects", extension="dir", @@ -91,7 +91,7 @@ rule build_pars_dsp_objects: rule build_pars_dsp_db: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, @@ -104,7 +104,7 @@ rule build_pars_dsp_db: output: temp( get_pattern_pars_tmp( - setup, + config, "dsp", datatype="cal", ) @@ -123,7 +123,7 @@ rule build_pars_dsp_db: rule build_pars_dsp: input: in_files=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, @@ -133,13 +133,13 @@ rule build_pars_dsp: extension="lh5", ), in_db=get_pattern_pars_tmp( - setup, + config, "dsp", datatype="cal", ), - plts=get_pattern_plts(setup, "dsp"), + plts=get_pattern_plts(config, "dsp"), objects=get_pattern_pars( - setup, + config, "dsp", name="objects", extension="dir", @@ -150,12 +150,12 @@ rule build_pars_dsp: datatype="cal", output: out_file=get_pattern_pars( - setup, + config, "dsp", extension="lh5", check_in_cycle=check_in_cycle, ), - out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle), + out_db=get_pattern_pars(config, "dsp", check_in_cycle=check_in_cycle), group: "merge-dsp" shell: @@ -171,10 +171,10 @@ rule build_pars_dsp: rule build_dsp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), + raw_file=get_pattern_tier(config, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" + config, wildcards.timestamp, "dsp" ) ), params: @@ -182,10 +182,10 @@ rule build_dsp: datatype="{datatype}", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "dsp_db"), + tier_file=get_pattern_tier(config, "dsp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "dsp_db"), log: - get_pattern_log(setup, "tier_dsp", time), + get_pattern_log(config, "tier_dsp", time), group: "tier-dsp" resources: diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 8f030cb..9fe1391 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -4,19 +4,19 @@ Snakemake rules for building dsp pars for HPGes, before running build_dsp() - extraction of energy filter parameters and charge trapping correction for each channel from cal data """ -from legenddataflow.create_pars_keylist import pars_key_resolve +from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, - get_pattern_tier_raw, + get_pattern_tier, get_pattern_log, get_pattern_pars, ) -dsp_par_catalog = pars_key_resolve.get_par_catalog( +dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) @@ -24,18 +24,18 @@ dsp_par_catalog = pars_key_resolve.get_par_catalog( rule build_pars_dsp_tau_geds: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")), + decay_const=temp(get_pattern_pars_tmp_channel(config, "dsp", "decay_constant")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "decay_constant")), log: - get_pattern_log_channel(setup, "par_dsp_decay_constant", time), + get_pattern_log_channel(config, "par_dsp_decay_constant", time), group: "par-dsp" resources: @@ -57,19 +57,19 @@ rule build_pars_dsp_tau_geds: rule build_pars_evtsel_geds: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist" ), - pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), + pulser_file=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"), raw_cal=get_blinding_curve_file, params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")), + peak_file=temp(get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5")), log: - get_pattern_log_channel(setup, "par_dsp_event_selection", time), + get_pattern_log_channel(config, "par_dsp_event_selection", time), group: "par-dsp" resources: @@ -94,21 +94,21 @@ rule build_pars_evtsel_geds: rule build_pars_dsp_nopt_geds: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist" ), - database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"), + database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"), + inplots=get_pattern_plts_tmp_channel(config, "dsp", "decay_constant"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: dsp_pars_nopt=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization") + get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization") ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization")), log: - get_pattern_log_channel(setup, "par_dsp_noise_optimization", time), + get_pattern_log_channel(config, "par_dsp_noise_optimization", time), group: "par-dsp" resources: @@ -132,23 +132,23 @@ rule build_pars_dsp_nopt_geds: rule build_pars_dsp_dplms_geds: input: fft_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist" ), - peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), - database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"), + peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5"), + database=get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization"), + inplots=get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")), lh5_path=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5") + get_pattern_pars_tmp_channel(config, "dsp", "dplms", extension="lh5") ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "dplms")), log: - get_pattern_log_channel(setup, "pars_dsp_dplms", time), + get_pattern_log_channel(config, "pars_dsp_dplms", time), group: "par-dsp" resources: @@ -173,21 +173,21 @@ rule build_pars_dsp_dplms_geds: # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files rule build_pars_dsp_eopt_geds: input: - peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"), - decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"), - inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"), + peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5"), + decay_const=get_pattern_pars_tmp_channel(config, "dsp", "dplms"), + inplots=get_pattern_plts_tmp_channel(config, "dsp", "dplms"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp_eopt")), qbb_grid=temp( - get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl") ), - plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")), + plots=temp(get_pattern_plts_tmp_channel(config, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_eopt", time), + get_pattern_log_channel(config, "pars_dsp_eopt", time), group: "par-dsp" resources: @@ -210,14 +210,16 @@ rule build_pars_dsp_eopt_geds: rule build_svm_dsp_geds: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( wildcards, "dsp", "svm_hyperpars" + ), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "dsp", "svm_hyperpars") ).replace("hyperpars.json", "train.lh5"), output: - dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"), + dsp_pars=get_pattern_pars(config, "dsp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal", time), + str(get_pattern_log(config, "pars_dsp_svm", time)).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -233,12 +235,12 @@ rule build_svm_dsp_geds: rule build_pars_dsp_svm_geds: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"), - svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"), + dsp_pars=get_pattern_pars_tmp_channel(config, "dsp_eopt"), + svm_file=get_pattern_pars(config, "dsp", "svm", "pkl"), output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp")), log: - get_pattern_log_channel(setup, "pars_dsp_svm", time), + get_pattern_log_channel(config, "pars_dsp_svm", time), group: "par-dsp" resources: diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index 02b7849..bb90ce8 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -13,29 +13,29 @@ from legenddataflow.patterns import ( rule build_evt: input: - dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), - hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), + hit_file=get_pattern_tier(config, "hit", check_in_cycle=False), + tcm_file=get_pattern_tier(config, "tcm", check_in_cycle=False), ann_file=lambda wildcards: ( None if int(wildcards["period"][1:]) > 11 - else get_pattern_tier(setup, "ann", check_in_cycle=False) + else get_pattern_tier(config, "ann", check_in_cycle=False) ), par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "hit" + config, wildcards.timestamp, "hit" ), xtalk_matrix=lambda wildcards: get_input_par_file( tier="evt", wildcards=wildcards, name="xtc" ), output: - get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "evt", check_in_cycle=check_in_cycle), params: timestamp="{timestamp}", datatype="{datatype}", tier="evt", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, f"tier_evt", time), + get_pattern_log(config, f"tier_evt", time), group: "tier-evt" resources: @@ -66,29 +66,29 @@ rule build_evt: rule build_pet: input: - dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False), - hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False), - tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False), + hit_file=get_pattern_tier(config, "pht", check_in_cycle=False), + tcm_file=get_pattern_tier(config, "tcm", check_in_cycle=False), ann_file=lambda wildcards: ( None if int(wildcards["period"][1:]) > 11 - else get_pattern_tier(setup, "pan", check_in_cycle=False) + else get_pattern_tier(config, "pan", check_in_cycle=False) ), par_files=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" + config, wildcards.timestamp, "pht" ), xtalk_matrix=lambda wildcards: get_input_par_file( tier="pet", wildcards=wildcards, name="xtc" ), output: - get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "pet", check_in_cycle=check_in_cycle), params: timestamp="{timestamp}", datatype="{datatype}", tier="pet", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, f"tier_pet", time), + get_pattern_log(config, f"tier_pet", time), group: "tier-evt" resources: @@ -126,21 +126,23 @@ for evt_tier in ("evt", "pet"): lambda wildcards: sorted( get_filelist_full_wildcards( wildcards, - setup, - get_pattern_tier_raw(setup), + config, + get_pattern_tier_raw(config), tier, ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"), ) ), output: - get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle), + get_pattern_tier( + config, f"{evt_tier}_concat", check_in_cycle=check_in_cycle + ), params: timestamp="all", datatype="{datatype}", - lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat", - ro_input=lambda _, input: utils.as_ro(setup, input), + lh5concat_exe=config["paths"]["install"] + "/bin/lh5concat", + ro_input=lambda _, input: utils.as_ro(config, input), log: - get_pattern_log_concat(setup, f"tier_{evt_tier}_concat", time), + get_pattern_log_concat(config, f"tier_{evt_tier}_concat", time), group: "tier-evt" shell: diff --git a/workflow/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk index 32d6175..d92a5aa 100644 --- a/workflow/rules/filelist_gen.smk +++ b/workflow/rules/filelist_gen.smk @@ -105,30 +105,30 @@ def get_keys(keypart): return filekeys -def get_pattern(setup, tier): +def get_pattern(config, tier): """ Helper function to get the search pattern for the given tier, some tiers such as skm need to refer to a different pattern when looking for files as only phy files are taken to skm others are only taken to pet """ if tier == "blind": - fn_pattern = patt.get_pattern_tier(setup, "raw", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, "raw", check_in_cycle=False) elif tier in ("skm", "pet_concat"): - fn_pattern = patt.get_pattern_tier(setup, "pet", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, "pet", check_in_cycle=False) elif tier == "evt_concat": - fn_pattern = patt.get_pattern_tier(setup, "evt", check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, "evt", check_in_cycle=False) elif tier == "daq": - fn_pattern = patt.get_pattern_tier_daq(setup, extension="{ext}") + fn_pattern = patt.get_pattern_tier_daq(config, extension="{ext}") else: - fn_pattern = patt.get_pattern_tier(setup, tier, check_in_cycle=False) + fn_pattern = patt.get_pattern_tier(config, tier, check_in_cycle=False) return fn_pattern -def concat_phy_filenames(setup, phy_filenames, tier): +def concat_phy_filenames(config, phy_filenames, tier): """ This function concatenates the files from the same run together """ - fn_pattern = patt.get_pattern(setup, tier) + fn_pattern = patt.get_pattern(config, tier) # group files by run sorted_phy_filenames = patt.run_grouper(phy_filenames) phy_filenames = [] @@ -136,7 +136,7 @@ def concat_phy_filenames(setup, phy_filenames, tier): for run in sorted_phy_filenames: key = FileKey.get_filekey_from_pattern(run[0], fn_pattern) out_key = FileKey.get_path_from_filekey( - key, patt.get_pattern_tier(setup, tier, check_in_cycle=False) + key, patt.get_pattern_tier(config, tier, check_in_cycle=False) )[0] phy_filenames.append(out_key) @@ -145,7 +145,7 @@ def concat_phy_filenames(setup, phy_filenames, tier): def build_filelist( - setup, + config, filekeys, search_pattern, tier, @@ -157,7 +157,7 @@ def build_filelist( and tier. It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict. """ - fn_pattern = get_pattern(setup, tier) + fn_pattern = get_pattern(config, tier) if ignore_keys is None: ignore_keys = [] @@ -177,11 +177,11 @@ def build_filelist( else: if tier == "blind" and _key.datatype in blind_datatypes: filename = FileKey.get_path_from_filekey( - _key, patt.get_pattern_tier_raw_blind(setup) + _key, patt.get_pattern_tier_raw_blind(config) ) elif tier == "skm": filename = FileKey.get_path_from_filekey( - _key, patt.get_pattern_tier(setup, "pet", check_in_cycle=False) + _key, patt.get_pattern_tier(config, "pet", check_in_cycle=False) ) elif tier == "daq": filename = FileKey.get_path_from_filekey( @@ -223,14 +223,14 @@ def build_filelist( if tier in concat_tiers: phy_filenames = concat_phy_filenames( - setup, phy_filenames, tier + config, phy_filenames, tier ) # concat phy files return phy_filenames + other_filenames def get_filelist( - wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None + wildcards, config, search_pattern, ignore_keys_file=None, analysis_runs_file=None ): file_selection = wildcards.label.split("-", 1)[0] # remove the file selection from the keypart @@ -242,7 +242,7 @@ def get_filelist( filekeys = get_keys(keypart) return build_filelist( - setup, + config, filekeys, search_pattern, wildcards.tier, @@ -253,7 +253,7 @@ def get_filelist( def get_filelist_full_wildcards( wildcards, - setup, + config, search_pattern, tier, ignore_keys_file=None, @@ -268,7 +268,7 @@ def get_filelist_full_wildcards( filekeys = get_keys(keypart) return build_filelist( - setup, + config, filekeys, search_pattern, tier, diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 1938779..9ae6f77 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -23,11 +23,11 @@ from legenddataflow.patterns import ( hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_hit"], "lar": ["par_hit"]}, ) -hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml" +hit_par_cat_file = Path(pars_path(config)) / "hit" / "validity.yaml" if hit_par_cat_file.is_file(): hit_par_cat_file.unlink() Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) @@ -38,22 +38,22 @@ ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) rule build_qc: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), fft_files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist" ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")), + qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")), log: - get_pattern_log_channel(setup, "pars_hit_qc", time), + get_pattern_log_channel(config, "pars_hit_qc", time), group: "par-hit" resources: @@ -79,30 +79,30 @@ rule build_qc: rule build_energy_calibration: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), ctc_dict=ancient( lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "dsp" + config, wildcards.timestamp, "dsp" ) ), - inplots=get_pattern_plts_tmp_channel(setup, "hit", "qc"), - in_hit_dict=get_pattern_pars_tmp_channel(setup, "hit", "qc"), + inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"), + in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - ecal_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "energy_cal")), + ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")), results_file=temp( get_pattern_pars_tmp_channel( - setup, "hit", "energy_cal_objects", extension="pkl" + config, "hit", "energy_cal_objects", extension="pkl" ) ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "energy_cal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")), log: - get_pattern_log_channel(setup, "pars_hit_energy_cal", time), + get_pattern_log_channel(config, "pars_hit_energy_cal", time), group: "par-hit" resources: @@ -130,28 +130,28 @@ rule build_energy_calibration: rule build_aoe_calibration: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "energy_cal"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"), eres_file=get_pattern_pars_tmp_channel( - setup, "hit", "energy_cal_objects", extension="pkl" + config, "hit", "energy_cal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "hit", "energy_cal"), + inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")), aoe_results=temp( get_pattern_pars_tmp_channel( - setup, "hit", "aoe_cal_objects", extension="pkl" + config, "hit", "aoe_cal_objects", extension="pkl" ) ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")), log: - get_pattern_log_channel(setup, "pars_hit_aoe_cal", time), + get_pattern_log_channel(config, "pars_hit_aoe_cal", time), group: "par-hit" resources: @@ -179,26 +179,26 @@ rule build_aoe_calibration: rule build_lq_calibration: input: files=os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"), eres_file=get_pattern_pars_tmp_channel( - setup, "hit", "aoe_cal_objects", extension="pkl" + config, "hit", "aoe_cal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal"), + inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"), params: timestamp="{timestamp}", datatype="cal", channel="{channel}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "hit")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")), lq_results=temp( - get_pattern_pars_tmp_channel(setup, "hit", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl") ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")), log: - get_pattern_log_channel(setup, "pars_hit_lq_cal", time), + get_pattern_log_channel(config, "pars_hit_lq_cal", time), group: "par-hit" resources: @@ -225,7 +225,7 @@ rule build_lq_calibration: rule build_pars_hit_objects: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, @@ -236,7 +236,7 @@ rule build_pars_hit_objects: ), output: get_pattern_pars( - setup, + config, "hit", name="objects", extension="dir", @@ -257,7 +257,7 @@ rule build_pars_hit_objects: rule build_plts_hit: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, @@ -265,7 +265,7 @@ rule build_plts_hit: chan_maps, ), output: - get_pattern_plts(setup, "hit"), + get_pattern_plts(config, "hit"), params: ro_input=lambda _, input: ro(input), group: @@ -281,16 +281,16 @@ rule build_plts_hit: rule build_pars_hit: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "hit", basedir, det_status, chan_maps, ), - plts=get_pattern_plts(setup, "hit"), + plts=get_pattern_plts(config, "hit"), objects=get_pattern_pars( - setup, + config, "hit", name="objects", extension="dir", @@ -299,7 +299,7 @@ rule build_pars_hit: params: ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - get_pattern_pars(setup, "hit", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "hit", check_in_cycle=check_in_cycle), group: "merge-hit" shell: @@ -312,20 +312,20 @@ rule build_pars_hit: rule build_hit: input: - dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False), + dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False), pars_file=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "hit" + config, wildcards.timestamp, "hit" ), output: - tier_file=get_pattern_tier(setup, "hit", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "hit_db"), + tier_file=get_pattern_tier(config, "hit", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "hit_db"), params: timestamp="{timestamp}", datatype="{datatype}", tier="hit", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, "tier_hit", time), + get_pattern_log(config, "tier_hit", time), group: "tier-hit" resources: diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk index e0d886e..a78784d 100644 --- a/workflow/rules/main.smk +++ b/workflow/rules/main.smk @@ -38,15 +38,15 @@ rule autogen_output: - generate lists of valid keys """ input: - filelist=os.path.join(filelist_path(setup), "{label}-{tier}.filelist"), + filelist=os.path.join(filelist_path(config), "{label}-{tier}.filelist"), output: gen_output="{label}-{tier}.gen", - summary_log=log_path(setup) + "/summary-{label}-{tier}-" + timestamp + ".log", - warning_log=log_path(setup) + "/warning-{label}-{tier}-" + timestamp + ".log", + summary_log=log_path(config) + "/summary-{label}-{tier}-" + timestamp + ".log", + warning_log=log_path(config) + "/warning-{label}-{tier}-" + timestamp + ".log", params: - valid_keys_path=os.path.join(pars_path(setup), "valid_keys"), - filedb_path=os.path.join(pars_path(setup), "filedb"), - setup=lambda wildcards: setup, + valid_keys_path=os.path.join(pars_path(config), "valid_keys"), + filedb_path=os.path.join(pars_path(config), "filedb"), + setup=lambda wildcards: config, basedir=basedir, threads: min(workflow.cores, 64) script: diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 937cd27..dd1deb4 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -21,13 +21,13 @@ from legenddataflow.patterns import ( get_pattern_pars, ) -pht_par_catalog = ds.ParsKeyResolve.get_par_catalog( +pht_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_pht"], "lar": ["par_pht"]}, ) -pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml" +pht_par_cat_file = Path(pars_path(config)) / "pht" / "validity.yaml" if pht_par_cat_file.is_file(): pht_par_cat_file.unlink() Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) @@ -39,11 +39,11 @@ intier = "psp" rule pht_checkpoint: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), output: - temp(get_pattern_pars_tmp_channel(setup, "pht", "check")), + temp(get_pattern_pars_tmp_channel(config, "pht", "check")), shell: "touch {output}" @@ -154,25 +154,25 @@ for key, dataset in part.datasets.items(): rule build_pht_qc: input: cal_files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), fft_files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + check_file=get_pattern_pars_tmp_channel(config, "pht", "check"), overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")), log: - get_pattern_log_channel(setup, "par_pht_qc", time), + get_pattern_log_channel(config, "par_pht_qc", time), group: "par-pht" resources: @@ -210,15 +210,15 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_per_energy_calibration: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), - pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"), ctc_dict=ancient( lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, intier + config, wildcards.timestamp, intier ) ), params: @@ -227,15 +227,15 @@ rule build_per_energy_calibration: channel="{channel}", tier="pht", output: - ecal_file=temp(get_pattern_pars_tmp_channel(setup, "pht", "energy_cal")), + ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")), results_file=temp( get_pattern_pars_tmp_channel( - setup, "pht", "energy_cal_objects", extension="pkl" + config, "pht", "energy_cal_objects", extension="pkl" ) ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")), log: - get_pattern_log_channel(setup, "par_pht_energy_cal", time), + get_pattern_log_channel(config, "par_pht_energy_cal", time), group: "par-pht" resources: @@ -386,29 +386,29 @@ for key, dataset in part.datasets.items(): rule build_pht_energy_super_calibrations: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"), eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "energy_cal_objects", extension="pkl" + config, "pht", "energy_cal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "energy_cal"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "partcal")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")), partcal_results=temp( get_pattern_pars_tmp_channel( - setup, "pht", "partcal_objects", extension="pkl" + config, "pht", "partcal_objects", extension="pkl" ) ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")), log: - get_pattern_log_channel(setup, "par_pht_partcal", time), + get_pattern_log_channel(config, "par_pht_partcal", time), group: "par-pht" resources: @@ -569,29 +569,29 @@ for key, dataset in part.datasets.items(): rule build_pht_aoe_calibrations: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "partcal"), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"), eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "partcal_objects", extension="pkl" + config, "pht", "partcal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "partcal"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "aoecal")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")), aoe_results=temp( get_pattern_pars_tmp_channel( - setup, "pht", "aoecal_objects", extension="pkl" + config, "pht", "aoecal_objects", extension="pkl" ) ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")), log: - get_pattern_log_channel(setup, "par_pht_aoe_cal", time), + get_pattern_log_channel(config, "par_pht_aoe_cal", time), group: "par-pht" resources: @@ -747,27 +747,27 @@ for key, dataset in part.datasets.items(): rule build_pht_lq_calibration: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "aoecal"), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"), eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "aoecal_objects", extension="pkl" + config, "pht", "aoecal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "aoecal"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")), lq_results=temp( - get_pattern_pars_tmp_channel(setup, "pht", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl") ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")), log: - get_pattern_log_channel(setup, "par_pht_lq_cal", time), + get_pattern_log_channel(config, "par_pht_lq_cal", time), group: "par-pht" resources: @@ -806,7 +806,7 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_pars_pht_objects: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, @@ -817,7 +817,7 @@ rule build_pars_pht_objects: ), output: get_pattern_pars( - setup, + config, "pht", name="objects", extension="dir", @@ -835,7 +835,7 @@ rule build_pars_pht_objects: rule build_plts_pht: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, @@ -843,7 +843,7 @@ rule build_plts_pht: chan_maps, ), output: - get_pattern_plts(setup, "pht"), + get_pattern_plts(config, "pht"), group: "merge-hit" shell: @@ -856,23 +856,23 @@ rule build_plts_pht: rule build_pars_pht: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, det_status, chan_maps, ), - plts=get_pattern_plts(setup, "pht"), + plts=get_pattern_plts(config, "pht"), objects=get_pattern_pars( - setup, + config, "pht", name="objects", extension="dir", check_in_cycle=check_in_cycle, ), output: - get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "pht", check_in_cycle=check_in_cycle), group: "merge-hit" shell: @@ -884,20 +884,20 @@ rule build_pars_pht: rule build_pht: input: - dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False), + dsp_file=get_pattern_tier(config, intier, check_in_cycle=False), pars_file=lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "pht" + config, wildcards.timestamp, "pht" ), output: - tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "pht_db"), + tier_file=get_pattern_tier(config, "pht", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "pht_db"), params: timestamp="{timestamp}", datatype="{datatype}", tier="pht", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, log: - get_pattern_log(setup, "tier_pht", time), + get_pattern_log(config, "tier_pht", time), group: "tier-pht" resources: diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk index b017e89..75f7a47 100644 --- a/workflow/rules/pht_fast.smk +++ b/workflow/rules/pht_fast.smk @@ -137,27 +137,27 @@ for key, dataset in part.datasets.items(): rule par_pht_fast: input: files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", ), - pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"), eres_file=get_pattern_pars_tmp_channel( - setup, "pht", "energy_cal_objects", extension="pkl" + config, "pht", "energy_cal_objects", extension="pkl" ), - inplots=get_pattern_plts_tmp_channel(setup, "pht", "energy_cal"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")), partcal_results=temp( - get_pattern_pars_tmp_channel(setup, "pht", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl") ), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")), log: - get_pattern_log_channel(setup, "par_pht_fast", time), + get_pattern_log_channel(config, "par_pht_fast", time), group: "par-pht" resources: diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index 02df934..456d30a 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -17,11 +17,11 @@ from legenddataflow.patterns import ( psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) -psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml" +psp_par_cat_file = Path(pars_path(config)) / "psp" / "validity.yaml" if psp_par_cat_file.is_file(): psp_par_cat_file.unlink() Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) @@ -31,7 +31,7 @@ ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) rule build_pars_psp_objects: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, @@ -42,7 +42,7 @@ rule build_pars_psp_objects: ), output: get_pattern_pars( - setup, + config, "psp", name="objects", extension="dir", @@ -61,7 +61,7 @@ rule build_pars_psp_objects: rule build_plts_psp: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, @@ -69,7 +69,7 @@ rule build_plts_psp: chan_maps, ), output: - get_pattern_plts(setup, "psp"), + get_pattern_plts(config, "psp"), group: "merge-psp" shell: @@ -83,7 +83,7 @@ rule build_plts_psp: rule build_pars_psp_db: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "psp", basedir, @@ -93,7 +93,7 @@ rule build_pars_psp_db: output: temp( get_pattern_pars_tmp( - setup, + config, "psp", datatype="cal", ) @@ -111,7 +111,7 @@ rule build_pars_psp_db: rule build_pars_psp: input: in_files=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "dsp", basedir, @@ -121,13 +121,13 @@ rule build_pars_psp: extension="lh5", ), in_db=get_pattern_pars_tmp( - setup, + config, "psp", datatype="cal", ), - plts=get_pattern_plts(setup, "psp"), + plts=get_pattern_plts(config, "psp"), objects=get_pattern_pars( - setup, + config, "psp", name="objects", extension="dir", @@ -135,12 +135,12 @@ rule build_pars_psp: ), output: out_file=get_pattern_pars( - setup, + config, "psp", extension="lh5", check_in_cycle=check_in_cycle, ), - out_db=get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle), + out_db=get_pattern_pars(config, "psp", check_in_cycle=check_in_cycle), group: "merge-psp" shell: @@ -155,10 +155,10 @@ rule build_pars_psp: rule build_psp: input: - raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False), + raw_file=get_pattern_tier(config, "raw", check_in_cycle=False), pars_file=ancient( lambda wildcards: ParsCatalog.get_par_file( - setup, wildcards.timestamp, "psp" + config, wildcards.timestamp, "psp" ) ), params: @@ -166,10 +166,10 @@ rule build_psp: datatype="{datatype}", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle), - db_file=get_pattern_pars_tmp(setup, "psp_db"), + tier_file=get_pattern_tier(config, "psp", check_in_cycle=check_in_cycle), + db_file=get_pattern_pars_tmp(config, "psp_db"), log: - get_pattern_log(setup, "tier_psp", time), + get_pattern_log(config, "tier_psp", time), group: "tier-dsp" resources: diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index a7938f4..6ac7e05 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -4,20 +4,20 @@ Snakemake rules for processing psp (partition dsp) tier data. - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data """ -from legenddataflow.pars_loading import pars_catalog -from legenddataflow.create_pars_keylist import pars_key_resolve -from legenddataflow.utils import par_psp_path, par_dsp_path, set_last_rule_name +from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.utils import set_last_rule_name from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_log_channel, get_pattern_log, get_pattern_pars, + get_pattern_tier, ) -psp_par_catalog = pars_key_resolve.get_par_catalog( +psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], - get_pattern_tier_raw(setup), + get_pattern_tier(config, "raw", check_in_cycle=False), {"cal": ["par_psp"], "lar": ["par_psp"]}, ) @@ -121,21 +121,21 @@ for key, dataset in part.datasets.items(): # This rule builds the a/e calibration using the calibration dsp files for the whole partition rule build_par_psp: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"), - dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"), - dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"), + dsp_pars=get_pattern_pars_tmp_channel(config, "dsp", "eopt"), + dsp_objs=get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl"), + dsp_plots=get_pattern_plts_tmp_channel(config, "dsp"), params: datatype="cal", channel="{channel}", timestamp="{timestamp}", output: - psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")), + psp_pars=temp(get_pattern_pars_tmp_channel(config, "psp", "eopt")), psp_objs=temp( - get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl") + get_pattern_pars_tmp_channel(config, "psp", "objects", extension="pkl") ), - psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")), + psp_plots=temp(get_pattern_plts_tmp_channel(config, "psp")), log: - get_pattern_log_channel(setup, "pars_psp", time), + get_pattern_log_channel(config, "pars_psp", time), group: "par-psp" resources: @@ -168,14 +168,16 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_svm_psp: input: - hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"), - train_data=lambda wildcards: get_svm_file( + hyperpars=lambda wildcards: get_input_par_file( wildcards, "psp", "svm_hyperpars" + ), + train_data=lambda wildcards: str( + get_input_par_file(wildcards, "psp", "svm_hyperpars") ).replace("hyperpars.json", "train.lh5"), output: - dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"), + dsp_pars=get_pattern_pars(config, "psp", "svm", "pkl"), log: - get_pattern_log(setup, "pars_psp_svm", time).replace("{datatype}", "cal"), + str(get_pattern_log(config, "pars_psp_svm", time)).replace("{datatype}", "cal"), group: "par-dsp-svm" resources: @@ -191,12 +193,12 @@ rule build_svm_psp: rule build_pars_psp_svm: input: - dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"), - svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"), + dsp_pars=get_pattern_pars_tmp_channel(config, "psp_eopt"), + svm_model=get_pattern_pars(config, "psp", "svm", "pkl"), output: - dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")), + dsp_pars=temp(get_pattern_pars_tmp_channel(config, "psp")), log: - get_pattern_log_channel(setup, "pars_dsp_svm", time), + get_pattern_log_channel(config, "pars_dsp_svm", time), group: "par-dsp" resources: diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index b04f1ef..522eb45 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -90,7 +90,7 @@ for key, dataset in part.datasets.items(): rule build_pht_qc_phy: input: phy_files=os.path.join( - filelist_path(setup), + filelist_path(config), "all-{experiment}-{period}-{run}-phy-" + f"{intier}.filelist", ), params: @@ -98,10 +98,10 @@ rule build_pht_qc_phy: channel="{channel}", timestamp="{timestamp}", output: - hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")), - plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")), + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qcphy")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qcphy")), log: - get_pattern_log_channel(setup, "pars_pht_qc_phy", time), + get_pattern_log_channel(config, "pars_pht_qc_phy", time), group: "par-pht" resources: @@ -134,7 +134,7 @@ workflow._ruleorder.add(*rule_order_list) # [::-1] rule build_plts_pht_phy: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, @@ -143,7 +143,7 @@ rule build_plts_pht_phy: name="qcphy", ), output: - get_pattern_plts(setup, "pht", "qc_phy"), + get_pattern_plts(config, "pht", "qc_phy"), group: "merge-hit" shell: @@ -156,7 +156,7 @@ rule build_plts_pht_phy: rule build_pars_pht_phy: input: infiles=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", "pht", basedir, @@ -164,9 +164,9 @@ rule build_pars_pht_phy: chan_maps, name="qcphy", ), - plts=get_pattern_plts(setup, "pht", "qc_phy"), + plts=get_pattern_plts(config, "pht", "qc_phy"), output: - get_pattern_pars(setup, "pht", name="qc_phy", check_in_cycle=check_in_cycle), + get_pattern_pars(config, "pht", name="qc_phy", check_in_cycle=check_in_cycle), group: "merge-hit" shell: diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 4bd1e7a..311c14c 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -11,9 +11,9 @@ from legenddataflow.create_pars_keylist import ParsKeyResolve raw_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], [ - get_pattern_tier_daq_unsorted(setup, extension="*"), - get_pattern_tier_daq(setup, extension="*"), - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier_daq_unsorted(config, extension="*"), + get_pattern_tier_daq(config, extension="*"), + get_pattern_tier(config, "raw", check_in_cycle=False), ], {"cal": ["par_raw"]}, ) @@ -24,15 +24,15 @@ rule build_raw_orca: This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file """ input: - get_pattern_tier_daq(setup, extension="orca"), + get_pattern_tier_daq(config, extension="orca"), params: timestamp="{timestamp}", datatype="{datatype}", ro_input=lambda _, input: ro(input), output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_raw", time), + get_pattern_log(config, "tier_raw", time), group: "tier-raw" resources: @@ -54,15 +54,15 @@ rule build_raw_fcio: This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file """ input: - get_pattern_tier_daq(setup, extension="fcio"), + get_pattern_tier_daq(config, extension="fcio"), params: timestamp="{timestamp}", datatype="{datatype}", ro_input=lambda _, input: ro(input), output: - get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_raw", time), + get_pattern_log(config, "tier_raw", time), group: "tier-raw" resources: @@ -85,7 +85,7 @@ rule build_raw_blind: and runs only if the blinding check file is on disk. Output is just the blinded raw file. """ input: - tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace( + tier_file=str(get_pattern_tier(config, "raw", check_in_cycle=False)).replace( "{datatype}", "phy" ), blind_file=get_blinding_curve_file, @@ -94,9 +94,11 @@ rule build_raw_blind: datatype="phy", ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, output: - get_pattern_tier_raw_blind(setup), + get_pattern_tier_raw_blind(config), log: - str(get_pattern_log(setup, "tier_raw_blind", time)).replace("{datatype}", "phy"), + str(get_pattern_log(config, "tier_raw_blind", time)).replace( + "{datatype}", "phy" + ), group: "tier-raw" resources: diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index ac05738..91a8755 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -12,15 +12,15 @@ from legenddataflow.patterns import ( rule build_skm: input: - get_pattern_tier(setup, "pet_concat", check_in_cycle=False), + get_pattern_tier(config, "pet_concat", check_in_cycle=False), output: - get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "skm", check_in_cycle=check_in_cycle), params: timestamp="20230410T000000Z", datatype="phy", ro_input=lambda _, input: ro(input), log: - get_pattern_log_concat(setup, "tier_skm", time), + get_pattern_log_concat(config, "tier_skm", time), group: "tier-skm" resources: diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index 2bc1686..9d80d1b 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -13,15 +13,15 @@ from legenddataflow.patterns import ( # This rule builds the tcm files each raw file rule build_tier_tcm: input: - get_pattern_tier(setup, "raw", check_in_cycle=False), + get_pattern_tier(config, "raw", check_in_cycle=False), params: timestamp="{timestamp}", datatype="{datatype}", input=lambda _, input: ro(input), output: - get_pattern_tier(setup, "tcm", check_in_cycle=check_in_cycle), + get_pattern_tier(config, "tcm", check_in_cycle=check_in_cycle), log: - get_pattern_log(setup, "tier_tcm", time), + get_pattern_log(config, "tier_tcm", time), group: "tier-tcm" resources: @@ -41,7 +41,7 @@ rule build_tier_tcm: rule build_pulser_ids: input: os.path.join( - filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist" + filelist_path(config), "all-{experiment}-{period}-{run}-cal-tcm.filelist" ), params: input=lambda _, input: ro(input), @@ -49,9 +49,9 @@ rule build_pulser_ids: datatype="cal", channel="{channel}", output: - pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")), + pulser=temp(get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids")), log: - get_pattern_log_channel(setup, "tcm_pulsers", time), + get_pattern_log_channel(config, "tcm_pulsers", time), group: "tier-tcm" resources: From 813e0709c7cfcb4ed9b209caa49ae45a2106fcfc Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 00:06:30 +0100 Subject: [PATCH 078/101] scripts to functions and rename --- .../scripts/blinding_calibration.py | 115 ----- .../src/legenddataflow/scripts/build_dsp.py | 167 -------- .../src/legenddataflow/scripts/build_evt.py | 182 -------- .../src/legenddataflow/scripts/build_fdb.py | 84 ---- .../src/legenddataflow/scripts/build_hit.py | 96 ----- .../legenddataflow/scripts/build_raw_blind.py | 181 -------- .../legenddataflow/scripts/build_raw_fcio.py | 68 --- .../legenddataflow/scripts/build_raw_orca.py | 108 ----- .../src/legenddataflow/scripts/build_skm.py | 96 ----- .../src/legenddataflow/scripts/build_tcm.py | 53 --- .../legenddataflow/scripts/check_blinding.py | 108 ----- .../legenddataflow/scripts/complete_run.py | 4 +- .../scripts/create_chankeylist.py | 42 +- workflow/src/legenddataflow/scripts/filedb.py | 88 ++++ .../legenddataflow/scripts/merge_channels.py | 252 +++++------ .../scripts/par/geds/dsp/dplms.py | 150 +++++++ .../scripts/par/geds/dsp/eopt.py | 398 ++++++++++++++++++ .../geds/dsp/evtsel.py} | 8 +- .../scripts/par/geds/dsp/nopt.py | 112 +++++ .../scripts/par/geds/dsp/svm.py | 26 ++ .../scripts/par/geds/dsp/svm_build.py | 63 +++ .../scripts/par/geds/dsp/tau.py | 146 +++++++ .../scripts/par/geds/hit/aoe.py | 262 ++++++++++++ .../geds/hit/ecal.py} | 10 +- .../legenddataflow/scripts/par/geds/hit/lq.py | 230 ++++++++++ .../{pars_hit_qc.py => par/geds/hit/qc.py} | 9 +- .../geds/pht/aoe.py} | 117 ++--- .../geds/pht/ecal_part.py} | 4 +- .../geds/pht/fast.py} | 14 +- .../{pars_pht_lqcal.py => par/geds/pht/lq.py} | 0 .../{pars_pht_qc.py => par/geds/pht/qc.py} | 7 +- .../geds/pht/qc_phy.py} | 7 +- .../scripts/par/geds/psp/average.py | 160 +++++++ .../par/geds/raw/blinding_calibration.py | 119 ++++++ .../scripts/par/geds/raw/check_blinding.py | 114 +++++ .../scripts/par/geds/tcm/pars_tcm_pulser.py | 58 +++ .../legenddataflow/scripts/par_psp_geds.py | 157 ------- .../scripts/pars_dsp_build_svm_geds.py | 57 --- .../scripts/pars_dsp_dplms_geds.py | 148 ------- .../scripts/pars_dsp_eopt_geds.py | 395 ----------------- .../scripts/pars_dsp_nopt_geds.py | 108 ----- .../scripts/pars_dsp_svm_geds.py | 20 - .../scripts/pars_dsp_tau_geds.py | 139 ------ .../legenddataflow/scripts/pars_hit_aoe.py | 290 ------------- .../src/legenddataflow/scripts/pars_hit_lq.py | 283 ------------- .../legenddataflow/scripts/pars_tcm_pulser.py | 57 --- .../src/legenddataflow/scripts/tier/dsp.py | 171 ++++++++ .../src/legenddataflow/scripts/tier/evt.py | 187 ++++++++ .../src/legenddataflow/scripts/tier/hit.py | 98 +++++ .../legenddataflow/scripts/tier/raw_blind.py | 185 ++++++++ .../legenddataflow/scripts/tier/raw_fcio.py | 72 ++++ .../legenddataflow/scripts/tier/raw_orca.py | 110 +++++ .../src/legenddataflow/scripts/tier/skm.py | 96 +++++ .../src/legenddataflow/scripts/tier/tcm.py | 55 +++ 54 files changed, 3115 insertions(+), 3171 deletions(-) delete mode 100644 workflow/src/legenddataflow/scripts/blinding_calibration.py delete mode 100644 workflow/src/legenddataflow/scripts/build_dsp.py delete mode 100644 workflow/src/legenddataflow/scripts/build_evt.py delete mode 100644 workflow/src/legenddataflow/scripts/build_fdb.py delete mode 100644 workflow/src/legenddataflow/scripts/build_hit.py delete mode 100644 workflow/src/legenddataflow/scripts/build_raw_blind.py delete mode 100644 workflow/src/legenddataflow/scripts/build_raw_fcio.py delete mode 100644 workflow/src/legenddataflow/scripts/build_raw_orca.py delete mode 100644 workflow/src/legenddataflow/scripts/build_skm.py delete mode 100644 workflow/src/legenddataflow/scripts/build_tcm.py delete mode 100644 workflow/src/legenddataflow/scripts/check_blinding.py create mode 100644 workflow/src/legenddataflow/scripts/filedb.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py rename workflow/src/legenddataflow/scripts/{pars_dsp_evtsel_geds.py => par/geds/dsp/evtsel.py} (98%) create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py rename workflow/src/legenddataflow/scripts/{pars_hit_ecal.py => par/geds/hit/ecal.py} (99%) create mode 100644 workflow/src/legenddataflow/scripts/par/geds/hit/lq.py rename workflow/src/legenddataflow/scripts/{pars_hit_qc.py => par/geds/hit/qc.py} (98%) rename workflow/src/legenddataflow/scripts/{pars_pht_aoecal.py => par/geds/pht/aoe.py} (84%) rename workflow/src/legenddataflow/scripts/{pars_pht_partcal.py => par/geds/pht/ecal_part.py} (99%) rename workflow/src/legenddataflow/scripts/{pars_pht_fast.py => par/geds/pht/fast.py} (95%) rename workflow/src/legenddataflow/scripts/{pars_pht_lqcal.py => par/geds/pht/lq.py} (100%) rename workflow/src/legenddataflow/scripts/{pars_pht_qc.py => par/geds/pht/qc.py} (98%) rename workflow/src/legenddataflow/scripts/{pars_pht_qc_phy.py => par/geds/pht/qc_phy.py} (97%) create mode 100644 workflow/src/legenddataflow/scripts/par/geds/psp/average.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py create mode 100644 workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py delete mode 100644 workflow/src/legenddataflow/scripts/par_psp_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_hit_aoe.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_hit_lq.py delete mode 100644 workflow/src/legenddataflow/scripts/pars_tcm_pulser.py create mode 100644 workflow/src/legenddataflow/scripts/tier/dsp.py create mode 100644 workflow/src/legenddataflow/scripts/tier/evt.py create mode 100644 workflow/src/legenddataflow/scripts/tier/hit.py create mode 100644 workflow/src/legenddataflow/scripts/tier/raw_blind.py create mode 100644 workflow/src/legenddataflow/scripts/tier/raw_fcio.py create mode 100644 workflow/src/legenddataflow/scripts/tier/raw_orca.py create mode 100644 workflow/src/legenddataflow/scripts/tier/skm.py create mode 100644 workflow/src/legenddataflow/scripts/tier/tcm.py diff --git a/workflow/src/legenddataflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py deleted file mode 100644 index e4b79f2..0000000 --- a/workflow/src/legenddataflow/scripts/blinding_calibration.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -This script applies a simple calibration to the daqenergy for all channels, -it does this using a peak search, matching the peaks to the given ones -and deriving a simple scaling relation from adc to keV. -""" - -import argparse -import logging -import pickle as pkl -from pathlib import Path - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from lgdo import lh5 -from pygama.pargen.energy_cal import HPGeCalibration - -mpl.use("agg") - -argparser = argparse.ArgumentParser() -argparser.add_argument("--files", help="files", nargs="*", type=str) - -argparser.add_argument("--blind_curve", help="blind_curve", type=str) -argparser.add_argument("--plot_file", help="out plot path", type=str) - -argparser.add_argument("--meta", help="meta", type=str) -argparser.add_argument("--configs", help="configs", type=str) -argparser.add_argument("--log", help="log", type=str) - -argparser.add_argument("--timestamp", help="timestamp", type=str) -argparser.add_argument("--datatype", help="datatype", type=str) -argparser.add_argument("--channel", help="channel", type=str) - -argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") -args = argparser.parse_args() - -logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("matplotlib").setLevel(logging.INFO) -log = logging.getLogger(__name__) - -# load in channel map -meta = LegendMetadata(args.meta, lazy=True) -chmap = meta.channelmap(args.timestamp) - -# if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True: -pars_dict = {} -# peaks to search for -peaks_keV = np.array( - [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50] -) - -E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np") -E_uncal = E_uncal[E_uncal > 200] -guess_keV = 2620 / np.nanpercentile(E_uncal, 99) # usual simple guess -Euc_min = peaks_keV[0] / guess_keV * 0.6 -Euc_max = peaks_keV[-1] / guess_keV * 1.1 -dEuc = 1 / guess_keV - -# daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) - - -hpge_cal = HPGeCalibration( - "daqenergy", - peaks_keV, - guess_keV, - 0, - uncal_is_int=True, - debug_mode=args.debug, -) - -# Run the rough peak search -detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal) - -log.info(f"{len(detected_peaks_locs)} peaks found:") -log.info("\t Energy | Position ") -for i, (Li, Ei) in enumerate(zip(detected_peaks_locs, detected_peaks_keV)): - log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5)) # noqa: G003 - -# dictionary to pass to build hit -out_dict = { - "pars": { - "operations": { - "daqenergy_cal": { - "expression": "daqenergy*a", - "parameters": {"a": round(roughpars[0], 5)}, - } - } - } -} - -# plot to check thagt the calibration is correct with zoom on 2.6 peak -fig = plt.figure(figsize=(8, 10)) -ax = plt.subplot(211) -ax.hist(E_uncal * roughpars[0], bins=np.arange(0, 3000, 1), histtype="step") -ax.set_ylabel("counts") -ax.set_yscale("log") -ax2 = plt.subplot(212) -ax2.hist( - E_uncal * roughpars[0], - bins=np.arange(2600, 2630, 1 * roughpars[0]), - histtype="step", -) -ax2.set_xlabel("energy (keV)") -ax2.set_ylabel("counts") -plt.suptitle(args.channel) -with Path(args.plot_file).open("wb") as w: - pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) -plt.close() - -Props.write_to_file(args.blind_curve, out_dict) diff --git a/workflow/src/legenddataflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py deleted file mode 100644 index 7e44bb6..0000000 --- a/workflow/src/legenddataflow/scripts/build_dsp.py +++ /dev/null @@ -1,167 +0,0 @@ -import argparse -import re -import time -import warnings -from pathlib import Path - -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from dspeed import build_dsp -from legendmeta import LegendMetadata -from lgdo import lh5 - -from ..log import build_log - - -def replace_list_with_array(dic): - for key, value in dic.items(): - if isinstance(value, dict): - dic[key] = replace_list_with_array(value) - elif isinstance(value, list): - dic[key] = np.array(value, dtype="float32") - else: - pass - return dic - - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--tier", help="Tier", type=str, required=True) - -argparser.add_argument( - "--pars_file", help="database file for detector", nargs="*", default=[] -) -argparser.add_argument("--input", help="input file", type=str) - -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] -if args.tier in ["dsp", "psp"]: - config_dict = config_dict["tier_dsp"] -elif args.tier in ["ann", "pan"]: - config_dict = config_dict["tier_ann"] -else: - msg = f"Tier {args.tier} not supported" - raise ValueError(msg) - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"]["processing_chain"] -settings_dict = config_dict["options"].get("settings", {}) -if isinstance(settings_dict, str): - settings_dict = Props.read_from(settings_dict) - -meta = LegendMetadata(path=args.metadata) -chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -if args.tier in ["ann", "pan"]: - channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) - for chan, file in channel_dict.items() - } -else: - channel_dict = { - f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) - for chan, file in channel_dict.items() - } -db_files = [ - par_file - for par_file in args.pars_file - if Path(par_file).suffix in (".json", ".yaml", ".yml") -] - -database_dic = Props.read_from(db_files, subst_pathvar=True) -database_dic = replace_list_with_array(database_dic) - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0, 99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -start = time.time() - -build_dsp( - args.input, - temp_output, - {}, - database=database_dic, - chan_config=channel_dict, - write_mode="r", - buffer_len=settings_dict.get("buffer_len", 1000), - block_width=settings_dict.get("block_width", 16), -) - -log.info(f"build_dsp finished in {time.time()-start}") -Path(temp_output).rename(args.output) - -key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") - -if args.tier in ["dsp", "psp"]: - raw_channels = [ - channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel) - ] - raw_fields = [ - field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/") - ] - - outputs = {} - channels = [] - for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - - full_dict = { - "valid_fields": { - "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, - "dsp": outputs, - }, - "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}}, - } -else: - outputs = {} - channels = [] - for channel, chan_dict in channel_dict.items(): - output = chan_dict["outputs"] - in_dict = False - for entry in outputs: - if outputs[entry]["fields"] == output: - outputs[entry]["channels"].append(channel.split("/")[0]) - in_dict = True - if in_dict is False: - outputs[f"group{len(list(outputs))+1}"] = { - "channels": [channel.split("/")[0]], - "fields": output, - } - channels.append(channel.split("/")[0]) - - full_dict = { - "valid_fields": { - "ann": outputs, - }, - "valid_keys": {key: {"valid_channels": {"ann": channels}}}, - } - -Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py deleted file mode 100644 index b4723b4..0000000 --- a/workflow/src/legenddataflow/scripts/build_evt.py +++ /dev/null @@ -1,182 +0,0 @@ -import argparse -import json -import time -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import Props, TextDB -from legendmeta import LegendMetadata -from lgdo.types import Array -from pygama.evt import build_evt - -from ..log import build_log - -sto = lh5.LH5Store() - - -def find_matching_values_with_delay(arr1, arr2, jit_delay): - matching_values = [] - - # Create an array with all possible delay values - delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay - - for delay in delays: - arr2_delayed = arr2 + delay - - # Find matching values and indices - mask = np.isin(arr1, arr2_delayed, assume_unique=True) - matching_values.extend(arr1[mask]) - - return np.unique(matching_values) - - -argparser = argparse.ArgumentParser() -argparser.add_argument("--hit_file", help="hit file", type=str) -argparser.add_argument("--dsp_file", help="dsp file", type=str) -argparser.add_argument("--tcm_file", help="tcm file", type=str) -argparser.add_argument("--ann_file", help="ann file") -argparser.add_argument("--xtc_file", help="xtc file", type=str) -argparser.add_argument("--par_files", help="par files", nargs="*") - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--tier", help="Tier", type=str, required=True) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata path", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--output", help="output file", type=str) -args = argparser.parse_args() - -# load in config -configs = TextDB(args.configs, lazy=True) -if args.tier in ("evt", "pet"): - rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_evt" - ] - -else: - msg = "unknown tier" - raise ValueError(msg) - -config_dict = rule_dict["inputs"] -evt_config_file = config_dict["evt_config"] - -log = build_log(rule_dict, args.log) - -meta = LegendMetadata(args.metadata, lazy=True) -chmap = meta.channelmap(args.timestamp) - -evt_config = Props.read_from(evt_config_file) - -if args.datatype in ("phy", "xtc"): - exp_string = evt_config["operations"]["geds___energy"]["expression"] - exp_string = exp_string.replace( - 'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"' - ) - exp_string = exp_string.replace( - 'cal_par_files=""', f"cal_par_files={args.par_files}" - ) - exp_string2 = exp_string.replace('return_mode="energy"', 'return_mode="tcm_index"') - - file_path_config = { - "operations": { - "geds___energy": {"expression": exp_string}, - "_geds___tcm_idx": {"expression": exp_string2}, - } - } - - log.debug(json.dumps(file_path_config, indent=2)) - - Props.add_to(evt_config, file_path_config) - -# block for snakemake to fill in channel lists -for field, dic in evt_config["channels"].items(): - if isinstance(dic, dict): - chans = chmap.map("system", unique=False)[dic["system"]] - if "selectors" in dic: - try: - for k, val in dic["selectors"].items(): - chans = chans.map(k, unique=False)[val] - except KeyError: - chans = None - if chans is not None: - chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] - else: - chans = [] - evt_config["channels"][field] = chans - -log.debug(json.dumps(evt_config["channels"], indent=2)) - -t_start = time.time() -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -file_table = { - "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), -} - -if args.ann_file is not None: - file_table["ann"] = (args.ann_file, "dsp", "ch{}") - -table = build_evt( - file_table, - evt_config, -) - -if "muon_config" in config_dict and config_dict["muon_config"] is not None: - muon_config = Props.read_from(config_dict["muon_config"]["evt_config"]) - field_config = Props.read_from(config_dict["muon_config"]["field_config"]) - # block for snakemake to fill in channel lists - for field, dic in muon_config["channels"].items(): - if isinstance(dic, dict): - chans = chmap.map("system", unique=False)[dic["system"]] - if "selectors" in dic: - try: - for k, val in dic["selectors"].items(): - chans = chans.map(k, unique=False)[val] - except KeyError: - chans = None - if chans is not None: - chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] - else: - chans = [] - muon_config["channels"][field] = chans - - trigger_timestamp = table[field_config["ged_timestamp"]["table"]][ - field_config["ged_timestamp"]["field"] - ].nda - if "hardware_tcm_2" in lh5.ls(args.tcm_file): - muon_table = build_evt( - { - "tcm": (args.tcm_file, "hardware_tcm_2", "ch{}"), - "dsp": (args.dsp_file, "dsp", "ch{}"), - "hit": (args.hit_file, "hit", "ch{}"), - "evt": (None, "evt"), - }, - muon_config, - ) - - muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda - muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda - if len(muon_timestamp[muon_tbl_flag]) > 0: - is_muon_veto_triggered = find_matching_values_with_delay( - trigger_timestamp, muon_timestamp[muon_tbl_flag], field_config["jitter"] - ) - muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered) - else: - muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) - else: - muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) - table[field_config["output_field"]["table"]].add_column( - field_config["output_field"]["field"], Array(muon_flag) - ) - -sto.write(obj=table, name="evt", lh5_file=args.output, wo_mode="a") - -t_elap = time.time() - t_start -log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") diff --git a/workflow/src/legenddataflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py deleted file mode 100644 index 93a3567..0000000 --- a/workflow/src/legenddataflow/scripts/build_fdb.py +++ /dev/null @@ -1,84 +0,0 @@ -import argparse -import logging -from pathlib import Path - -import numpy as np -from dbetto.catalog import Props -from lgdo import lh5 -from pygama.flow.file_db import FileDB - -argparser = argparse.ArgumentParser() -argparser.add_argument("--config", required=True) -argparser.add_argument("--scan-path", required=True) -argparser.add_argument("--output", required=True) -argparser.add_argument("--log") -argparser.add_argument("--assume-nonsparse", action="store_true") -args = argparser.parse_args() - -config = Props.read_from(args.config) - -if args.log is not None: - Path(args.log).parent.mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") -else: - logging.basicConfig(level=logging.DEBUG) - -logging.getLogger("legendmeta").setLevel(logging.INFO) -logging.getLogger("numba").setLevel(logging.INFO) -logging.getLogger("parse").setLevel(logging.INFO) -logging.getLogger("lgdo").setLevel(logging.INFO) -logging.getLogger("h5py._conv").setLevel(logging.INFO) - -log = logging.getLogger(__name__) - -fdb = FileDB(config, scan=False) -fdb.scan_files([args.scan_path]) -fdb.scan_tables_columns(dir_files_conform=True) - -# augment dataframe with earliest timestamp found in file - -default = np.finfo("float64").max -timestamps = np.zeros(len(fdb.df), dtype="float64") - -for i, row in enumerate(fdb.df.itertuples()): - store = lh5.LH5Store( - base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True - ) - - # list of first timestamps for each channel - loc_timestamps = np.full(len(row.raw_tables), fill_value=default, dtype="float64") - - msg = f"finding first timestamp in {fdb.data_dir}/{fdb.tier_dirs['raw']}/{row.raw_file}" - log.info(msg) - - found = False - for j, table in enumerate(row.raw_tables): - try: - loc_timestamps[j] = store.read( - fdb.table_format["raw"].format(ch=table) + "/timestamp", - row.raw_file.strip("/"), - n_rows=1, - )[0][0] - found = True - except KeyError: - pass - - if found and args.assume_nonsparse: - break - - if (loc_timestamps == default).all() or not found: - msg = "something went wrong! no valid first timestamp found" - raise RuntimeError(msg) - - timestamps[i] = np.min(loc_timestamps) - - msg = f"found {timestamps[i]}" - log.info(msg) - - if timestamps[i] < 0 or timestamps[i] > 4102444800: - msg = "something went wrong! timestamp does not make sense" - raise RuntimeError(msg) - -fdb.df["first_timestamp"] = timestamps - -fdb.to_disk(args.output, wo_mode="of") diff --git a/workflow/src/legenddataflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py deleted file mode 100644 index 47b0fa0..0000000 --- a/workflow/src/legenddataflow/scripts/build_hit.py +++ /dev/null @@ -1,96 +0,0 @@ -import argparse -import time -from pathlib import Path - -from dbetto.catalog import Props -from legendmeta import LegendMetadata, TextDB -from lgdo import lh5 -from pygama.hit.build_hit import build_hit - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--pars_file", help="hit pars file", nargs="*") - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--tier", help="Tier", type=str, required=True) - -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--db_file", help="db file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True) -if args.tier == "hit" or args.tier == "pht": - config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_hit" - ] -else: - msg = "unknown tier" - raise ValueError(msg) - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"]["hit_config"] -settings_dict = config_dict["options"].get("settings", {}) -if isinstance(settings_dict, str): - settings_dict = Props.read_from(settings_dict) - -meta = LegendMetadata(path=args.metadata) -chan_map = meta.channelmap(args.timestamp, system=args.datatype) - -pars_dict = Props.read_from(args.pars_file) -pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} - -hit_dict = {} -channels_present = lh5.ls(args.input) -for channel in pars_dict: - chan_pars = pars_dict[channel].copy() - try: - detector = chan_map.map("daq.rawid")[int(channel[2:])].name - if detector in channel_dict: - cfg_dict = Props.read_from(channel_dict[detector]) - Props.add_to(cfg_dict, chan_pars) - chan_pars = cfg_dict - - if channel in channels_present: - hit_dict[f"{channel}/dsp"] = chan_pars - except KeyError: - pass - -t_start = time.time() -Path(args.output).parent.mkdir(parents=True, exist_ok=True) -build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) -t_elap = time.time() - t_start -log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") - -hit_outputs = {} -hit_channels = [] -for channel, file in channel_dict.items(): - output = Props.read_from(file)["outputs"] - in_dict = False - for entry in hit_outputs: - if hit_outputs[entry]["fields"] == output: - hit_outputs[entry]["channels"].append(channel) - in_dict = True - if in_dict is False: - hit_outputs[f"group{len(list(hit_outputs))+1}"] = { - "channels": [channel], - "fields": output, - } - hit_channels.append(channel) - -key = args.output.replace(f"-tier_{args.tier}.lh5", "") - -full_dict = { - "valid_fields": {args.tier: hit_outputs}, - "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, -} - -Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py deleted file mode 100644 index 3d42717..0000000 --- a/workflow/src/legenddataflow/scripts/build_raw_blind.py +++ /dev/null @@ -1,181 +0,0 @@ -""" -This script takes in raw data, applies the calibration to the daqenergy -and uses this to blind the data in a window of Qbb +- 25 keV. It copies over all -channels in a raw file, removing those events that fall within the ROI for Ge detectors -that have a daqenergy calibration curve and are not anti-coincidence only (AC). It removes -the whole event from all of the Ge and SiPM channels. - -In the Snakemake dataflow, this script only runs if the checkfile is found on disk, -but this is controlled by the Snakemake flow (presumably an error is thrown if the file -is not found). This script itself does not check for the existence of such a file. -""" - -import argparse -from pathlib import Path - -import numexpr as ne -import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata, TextDB -from lgdo import lh5 - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument( - "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*" -) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--metadata", help="metadata", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_raw" -] - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"] -hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] -blinding_settings = Props.read_from(config_dict["config"]) - -centroid = blinding_settings["centroid_in_keV"] # keV -width = blinding_settings["width_in_keV"] # keV - -# list of all channels and objects in the raw file -all_channels = lh5.ls(args.input) - -# list of Ge channels and SiPM channels with associated metadata -legendmetadata = LegendMetadata(args.metadata, lazy=True) -ged_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["geds"] - .map("daq.rawid") -) -spms_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["spms"] - .map("daq.rawid") -) -auxs_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["auxs"] - .map("daq.rawid") -) -blsn_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["bsln"] - .map("daq.rawid") -) -puls_channels = ( - legendmetadata.channelmap(args.timestamp) - .map("system", unique=False)["puls"] - .map("daq.rawid") -) - -store = lh5.LH5Store() - -# rows that need blinding -toblind = np.array([]) - -# first, loop through the Ge detector channels, calibrate them and look for events that should be blinded -for chnum in list(ged_channels): - # skip Ge detectors that are anti-coincidence only or not able to be blinded for some other reason - if ged_channels[chnum]["analysis"]["is_blinded"] is False: - continue - - # load in just the daqenergy for now - daqenergy, _ = store.read(f"ch{chnum}/raw/daqenergy", args.input) - - # read in calibration curve for this channel - blind_curve = Props.read_from(args.blind_curve)[f"ch{chnum}"]["pars"]["operations"] - - # calibrate daq energy using pre existing curve - daqenergy_cal = ne.evaluate( - blind_curve["daqenergy_cal"]["expression"], - local_dict=dict( - daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] - ), - ) - - # figure out which event indices should be blinded - toblind = np.append( - toblind, np.nonzero(np.abs(np.asarray(daqenergy_cal) - centroid) <= width)[0] - ) - -# remove duplicates -toblind = np.unique(toblind) - -# total number of events (from last Ge channel loaded, should be same for all Ge channels) -allind = np.arange(len(daqenergy)) - -# gets events that should not be blinded -tokeep = allind[np.logical_not(np.isin(allind, toblind))] - -# make some temp file to write the output to before renaming it -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" -Path(temp_output).parent.mkdir(parents=True, exist_ok=True) - -for channel in all_channels: - try: - chnum = int(channel[2::]) - except ValueError: - # if this isn't an interesting channel, just copy it to the output file - chobj, _ = store.read(channel, args.input, decompress=False) - store.write_object( - chobj, - channel, - lh5_file=temp_output, - wo_mode="w", - **hdf_settings, - ) - continue - - if ( - (chnum not in list(ged_channels)) - and (chnum not in list(spms_channels)) - and (chnum not in list(auxs_channels)) - and (chnum not in list(blsn_channels)) - and (chnum not in list(puls_channels)) - ): - # if this is a PMT or not included for some reason, just copy it to the output file - chobj, _ = store.read(channel + "/raw", args.input, decompress=False) - store.write_object( - chobj, - group=channel, - name="raw", - lh5_file=temp_output, - wo_mode="w", - **hdf_settings, - ) - continue - - # the rest should be the Ge and SiPM channels that need to be blinded - - # read in all of the data but only for the unblinded events - blinded_chobj, _ = store.read( - channel + "/raw", args.input, idx=tokeep, decompress=False - ) - - # now write the blinded data for this channel - store.write_object( - blinded_chobj, - group=channel, - name="raw", - lh5_file=temp_output, - wo_mode="w", - **hdf_settings, - ) - -# rename the temp file -Path(args.output).parent.mkdir(parents=True, exist_ok=True) -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/build_raw_fcio.py b/workflow/src/legenddataflow/scripts/build_raw_fcio.py deleted file mode 100644 index 176565a..0000000 --- a/workflow/src/legenddataflow/scripts/build_raw_fcio.py +++ /dev/null @@ -1,68 +0,0 @@ -import argparse -from copy import deepcopy -from pathlib import Path - -import numpy as np -from daq2lh5 import build_raw -from dbetto import TextDB -from dbetto.catalog import Props - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("input", help="input file", type=str) -argparser.add_argument("output", help="output file", type=str) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -config_dict = ( - TextDB(args.configs, lazy=True) - .on(args.timestamp, system=args.datatype) - .snakemake_rules.tier_raw_fcio -) - -log = build_log(config_dict, args.log) - -channel_dict = config_dict.inputs -settings = Props.read_from(channel_dict.settings) -channel_dict = channel_dict.out_spec -all_config = Props.read_from(channel_dict.gen_config) - -chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") - -if "geds_config" in channel_dict: - raise NotImplementedError() - -if "spms_config" in channel_dict: - spm_config = Props.read_from(channel_dict.spms_config) - spm_channels = chmap.spms.map("daq.rawid") - - for rawid, chinfo in spm_channels.items(): - cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"]) - cfg_block["key_list"] = [chinfo.daq.fc_channel] - spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block - - spm_config["FCEventDecoder"].pop("__output_table_name__") - - Props.add_to(all_config, spm_config) - -if "auxs_config" in channel_dict: - raise NotImplementedError() - -if "muon_config" in channel_dict: - raise NotImplementedError() - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) - -# rename the temp file -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py deleted file mode 100644 index 72b5ac6..0000000 --- a/workflow/src/legenddataflow/scripts/build_raw_orca.py +++ /dev/null @@ -1,108 +0,0 @@ -import argparse -import logging -from pathlib import Path - -import numpy as np -from daq2lh5 import build_raw -from dbetto import TextDB -from dbetto.catalog import Props - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("input", help="input file", type=str) -argparser.add_argument("output", help="output file", type=str) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--chan_maps", help="chan map", type=str) -argparser.add_argument("--log", help="log file") -args = argparser.parse_args() - -Path(args.log).parent.mkdir(parents=True, exist_ok=True) -logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -configs = TextDB(args.configs, lazy=True) -config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ - "tier_raw" -] - -log = build_log(config_dict, args.log) - -channel_dict = config_dict["inputs"] -settings = Props.read_from(channel_dict["settings"]) -channel_dict = channel_dict["out_spec"] -all_config = Props.read_from(channel_dict["gen_config"]) - -chmap = TextDB(args.chan_maps, lazy=True) - -if "geds_config" in list(channel_dict): - ged_config = Props.read_from(channel_dict["geds_config"]) - - ged_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["geds"] - .map("daq.rawid") - ) - - ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) - Props.add_to(all_config, ged_config) - -if "spms_config" in list(channel_dict): - spm_config = Props.read_from(channel_dict["spms_config"]) - - spm_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["spms"] - .map("daq.rawid") - ) - - spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) - Props.add_to(all_config, spm_config) - -if "auxs_config" in list(channel_dict): - aux_config = Props.read_from(channel_dict["auxs_config"]) - aux_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["auxs"] - .map("daq.rawid") - ) - aux_channels += list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["puls"] - .map("daq.rawid") - ) - aux_channels += list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["bsln"] - .map("daq.rawid") - ) - top_key = next(iter(aux_config)) - aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted( - aux_channels - ) - Props.add_to(all_config, aux_config) - -if "muon_config" in list(channel_dict): - muon_config = Props.read_from(channel_dict["muon_config"]) - muon_channels = list( - chmap.channelmaps.on(args.timestamp) - .map("system", unique=False)["muon"] - .map("daq.rawid") - ) - top_key = next(iter(muon_config)) - muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( - muon_channels - ) - Props.add_to(all_config, muon_config) - -rng = np.random.default_rng() -rand_num = f"{rng.integers(0,99999):05d}" -temp_output = f"{args.output}.{rand_num}" - -build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) - -# rename the temp file -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py deleted file mode 100644 index 9411b1b..0000000 --- a/workflow/src/legenddataflow/scripts/build_skm.py +++ /dev/null @@ -1,96 +0,0 @@ -import argparse - -import awkward as ak -from dbetto import TextDB -from dbetto.catalog import Props -from lgdo import lh5 -from lgdo.types import Array, Struct, Table, VectorOfVectors - -from ..log import build_log - - -def get_all_out_fields(input_table, out_fields, current_field=""): - for key in input_table: - field = input_table[key] - key_string = f"{current_field}.{key}" - if isinstance(field, (Table, Struct)): - get_all_out_fields(field, out_fields, key_string) - else: - if key_string not in out_fields: - out_fields.append(key_string) - return out_fields - - -argparser = argparse.ArgumentParser() -argparser.add_argument("--evt_file", help="evt file", required=True) -argparser.add_argument("--configs", help="configs", required=True) -argparser.add_argument("--datatype", help="datatype", required=True) -argparser.add_argument("--timestamp", help="timestamp", required=True) -argparser.add_argument("--log", help="log file", default=None) -argparser.add_argument("--output", help="output file", required=True) -args = argparser.parse_args() - -# load in config -config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[ - "snakemake_rules" -]["tier_skm"] - -log = build_log(config_dict, args.log) - - -skm_config_file = config_dict["inputs"]["skm_config"] -evt_filter = Props.read_from(skm_config_file)["evt_filter"] -out_fields = Props.read_from(skm_config_file)["keep_fields"] - -store = lh5.LH5Store() - -evt = lh5.read_as("evt", args.evt_file, "ak") - -# remove unwanted events -skm = eval(f"evt[{evt_filter}]") -# make it rectangular and make an LGDO Table -out_table = Table(skm) - -for field in out_fields: - items = field.split(".") - ptr1 = out_table - for item in items[:-1]: - ptr1 = ptr1[item] - - if isinstance(ptr1[items[-1]], Table): - out_fields.remove(field) - out_fields = get_all_out_fields( - ptr1[items[-1]], out_fields, current_field=field - ) - -# remove unwanted columns -out_table_skm = Table(size=len(out_table)) -for field in out_fields: - # table nesting is labeled by '.' in the config - items = field.split(".") - # get to actual nested field recursively - ptr1 = out_table - ptr2 = out_table_skm - for item in items[:-1]: - # make intermediate tables in new table - if item not in ptr2: - ptr2.add_field(item, Table(size=len(out_table))) - # get non-table LGDO recursively - ptr1 = ptr1[item] - ptr2 = ptr2[item] - - # finally add column to new table - if isinstance(ptr1[items[-1]], VectorOfVectors): - ptr2.add_field(items[-1], Array(ak.flatten(ptr1[items[-1]].view_as("ak")))) - else: - ptr2.add_field(items[-1], ptr1[items[-1]]) - attrs = ptr1[items[-1]].attrs - - # forward LGDO attributes - # attrs = evt[field.replace(".", "_")].attrs - for attr, val in attrs.items(): - if attr != "datatype": - ptr2.attrs[attr] = val - -# write-append to disk -store.write(out_table_skm, "skm", args.output, wo_mode="w") diff --git a/workflow/src/legenddataflow/scripts/build_tcm.py b/workflow/src/legenddataflow/scripts/build_tcm.py deleted file mode 100644 index 7e6ab73..0000000 --- a/workflow/src/legenddataflow/scripts/build_tcm.py +++ /dev/null @@ -1,53 +0,0 @@ -import argparse -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from daq2lh5.orca import orca_flashcam -from dbetto import TextDB -from dbetto.catalog import Props -from pygama.evt.build_tcm import build_tcm - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("input", help="input file", type=str) -argparser.add_argument("output", help="output file", type=str) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["tier_tcm"] - -log = build_log(config_dict, args.log) - -settings = Props.read_from(config_dict["inputs"]["config"]) - -rng = np.random.default_rng() -temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -# get the list of channels by fcid -ch_list = lh5.ls(args.input, "/ch*") -fcid_channels = {} -for ch in ch_list: - key = int(ch[2:]) - fcid = orca_flashcam.get_fcid(key) - if fcid not in fcid_channels: - fcid_channels[fcid] = [] - fcid_channels[fcid].append(f"/{ch}/raw") - -# make a hardware_tcm_[fcid] for each fcid -for fcid, fcid_dict in fcid_channels.items(): - build_tcm( - [(args.input, fcid_dict)], - out_file=temp_output, - out_name=f"hardware_tcm_{fcid}", - wo_mode="o", - **settings, - ) - -Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py deleted file mode 100644 index faf800d..0000000 --- a/workflow/src/legenddataflow/scripts/check_blinding.py +++ /dev/null @@ -1,108 +0,0 @@ -""" -This script checks that the blinding for a particular channel is still valid, -it does this by taking the calibration curve stored in the overrides, applying it -to the daqenergy, running a peak search over the calibrated energy and checking that -there are peaks within 5keV of the 583 and 2614 peaks. If the detector is in ac mode -then it will skip the check. -""" - -import argparse -import pickle as pkl -from pathlib import Path - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numexpr as ne -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from lgdo import lh5 -from pygama.math.histogram import get_hist -from pygama.pargen.energy_cal import get_i_local_maxima - -from ..log import build_log - -mpl.use("Agg") - -argparser = argparse.ArgumentParser() -argparser.add_argument("--files", help="files", nargs="*", type=str) -argparser.add_argument("--output", help="output file", type=str) -argparser.add_argument("--plot_file", help="plot file", type=str) -argparser.add_argument( - "--blind_curve", help="blinding curves file", nargs="*", type=str -) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--configs", help="config file", type=str) -argparser.add_argument("--channel", help="channel", type=str) -argparser.add_argument("--metadata", help="channel", type=str) -argparser.add_argument("--log", help="log file", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] - -log = build_log(config_dict, args.log) - -# get the usability status for this channel -chmap = ( - LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid") -) -det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"] - -# read in calibration curve for this channel -blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"] - -# load in the data -daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as( - "np" -) - -# calibrate daq energy using pre existing curve -daqenergy_cal = ne.evaluate( - blind_curve["daqenergy_cal"]["expression"], - local_dict=dict(daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]), -) - -# bin with 1 keV bins and get maxs -hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1)) -maxs = get_i_local_maxima(hist, delta=25) -log.info(f"peaks found at : {maxs}") - -# plot the energy spectrum to check calibration -fig = plt.figure(figsize=(8, 10)) -ax = plt.subplot(211) -ax.hist(daqenergy_cal, bins=np.arange(0, 3000, 1), histtype="step") -ax.set_ylabel("counts") -ax.set_yscale("log") -ax2 = plt.subplot(212) -ax2.hist( - daqenergy_cal, - bins=np.arange(2600, 2630, 1 * blind_curve["daqenergy_cal"]["parameters"]["a"]), - histtype="step", -) -ax2.set_xlabel("energy (keV)") -ax2.set_ylabel("counts") -plt.suptitle(args.channel) -with Path(args.plot_file).open("wb") as w: - pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) -plt.close() - -# check for peaks within +- 5keV of 2614 and 583 to ensure blinding still -# valid and if so create file else raise error. if detector is in ac mode it -# will always pass this check -if ( - np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) -) or det_status is False: - Path(args.output).mkdir(parents=True, exist_ok=True) - Props.write_to( - args.output, - { - "threshold_adc": np.nanmin(daqenergy), - "threshold_kev": np.nanmin(daqenergy_cal), - }, - ) -else: - msg = "peaks not found in daqenergy" - raise RuntimeError(msg) diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py index e3892eb..eff7a90 100644 --- a/workflow/src/legenddataflow/scripts/complete_run.py +++ b/workflow/src/legenddataflow/scripts/complete_run.py @@ -7,9 +7,9 @@ import time from pathlib import Path +from .. import patterns +from .. import utils as ut from ..FileKey import FileKey -from . import patterns -from . import utils as ut print("INFO: dataflow ran successfully, now few final checks and scripts") diff --git a/workflow/src/legenddataflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py index a75be8b..9566068 100644 --- a/workflow/src/legenddataflow/scripts/create_chankeylist.py +++ b/workflow/src/legenddataflow/scripts/create_chankeylist.py @@ -4,27 +4,29 @@ from dbetto import TextDB from legendmeta import LegendMetadata -argparser = argparse.ArgumentParser() -argparser.add_argument("--det_status", help="det_status", type=str, required=True) -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) -argparser.add_argument("--output_file", help="output_file", type=str, required=True) -args = argparser.parse_args() +def create_chankeylist() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--det_status", help="det_status", type=str, required=True) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) -det_status = TextDB(args.det_status, lazy=True) -status_map = det_status.statuses.on(args.timestamp, system=args.datatype) + argparser.add_argument("--output_file", help="output_file", type=str, required=True) + args = argparser.parse_args() -channel_map = LegendMetadata(args.channelmap, lazy=True) -chmap = channel_map.channelmaps.on(args.timestamp) + det_status = TextDB(args.det_status, lazy=True) + status_map = det_status.statuses.on(args.timestamp, system=args.datatype) -channels = [ - chan - for chan in status_map - if status_map[chan]["processable"] is True and chmap[chan].system == "geds" -] -Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) -with Path(args.output_file).open("w") as f: - for chan in channels: - f.write(f"{chan}\n") + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmaps.on(args.timestamp) + + channels = [ + chan + for chan in status_map + if status_map[chan]["processable"] is True and chmap[chan].system == "geds" + ] + Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.output_file).open("w") as f: + for chan in channels: + f.write(f"{chan}\n") diff --git a/workflow/src/legenddataflow/scripts/filedb.py b/workflow/src/legenddataflow/scripts/filedb.py new file mode 100644 index 0000000..d9b52d8 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/filedb.py @@ -0,0 +1,88 @@ +import argparse +import logging +from pathlib import Path + +import numpy as np +from dbetto.catalog import Props +from lgdo import lh5 +from pygama.flow.file_db import FileDB + + +def build_filedb() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--config", required=True) + argparser.add_argument("--scan-path", required=True) + argparser.add_argument("--output", required=True) + argparser.add_argument("--log") + argparser.add_argument("--assume-nonsparse", action="store_true") + args = argparser.parse_args() + + config = Props.read_from(args.config) + + if args.log is not None: + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + else: + logging.basicConfig(level=logging.DEBUG) + + logging.getLogger("legendmeta").setLevel(logging.INFO) + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("h5py._conv").setLevel(logging.INFO) + + log = logging.getLogger(__name__) + + fdb = FileDB(config, scan=False) + fdb.scan_files([args.scan_path]) + fdb.scan_tables_columns(dir_files_conform=True) + + # augment dataframe with earliest timestamp found in file + + default = np.finfo("float64").max + timestamps = np.zeros(len(fdb.df), dtype="float64") + + for i, row in enumerate(fdb.df.itertuples()): + store = lh5.LH5Store( + base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True + ) + + # list of first timestamps for each channel + loc_timestamps = np.full( + len(row.raw_tables), fill_value=default, dtype="float64" + ) + + msg = f"finding first timestamp in {fdb.data_dir}/{fdb.tier_dirs['raw']}/{row.raw_file}" + log.info(msg) + + found = False + for j, table in enumerate(row.raw_tables): + try: + loc_timestamps[j] = store.read( + fdb.table_format["raw"].format(ch=table) + "/timestamp", + row.raw_file.strip("/"), + n_rows=1, + )[0][0] + found = True + except KeyError: + pass + + if found and args.assume_nonsparse: + break + + if (loc_timestamps == default).all() or not found: + msg = "something went wrong! no valid first timestamp found" + raise RuntimeError(msg) + + timestamps[i] = np.min(loc_timestamps) + + msg = f"found {timestamps[i]}" + log.info(msg) + + if timestamps[i] < 0 or timestamps[i] > 4102444800: + msg = "something went wrong! timestamp does not make sense" + raise RuntimeError(msg) + + fdb.df["first_timestamp"] = timestamps + + fdb.to_disk(args.output, wo_mode="of") diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py index d6fec7a..6fee6f5 100644 --- a/workflow/src/legenddataflow/scripts/merge_channels.py +++ b/workflow/src/legenddataflow/scripts/merge_channels.py @@ -24,138 +24,142 @@ def replace_path(d, old_path, new_path): return d -argparser = argparse.ArgumentParser() -argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True) -argparser.add_argument("--output", help="output file", type=str, required=True) -argparser.add_argument( - "--in_db", - help="in db file (used for when lh5 files referred to in db)", - type=str, - required=False, -) -argparser.add_argument( - "--out_db", - help="lh5 file (used for when lh5 files referred to in db)", - type=str, - required=False, -) -argparser.add_argument( - "--channelmap", - help="channelmap", - type=str, - required=False, - default=None, -) -argparser.add_argument( - "--timestamp", - help="timestamp", - type=str, - required=False, -) -args = argparser.parse_args() - -# change to only have 1 output file for multiple inputs -# don't care about processing step, check if extension matches - -channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input - -file_extension = Path(args.output).suffix - -if args.channelmap is not None: - channel_map = LegendMetadata(args.channelmap, lazy=True) - chmap = channel_map.channelmap(args.timestamp) -else: - chmap = None - -if file_extension == ".dat" or file_extension == ".dir": - out_file = Path(args.output).with_suffix("") -else: - out_file = args.output - -rng = np.random.default_rng() -temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" - -Path(args.output).parent.mkdir(parents=True, exist_ok=True) - -if file_extension in (".json", ".yaml", ".yml"): - out_dict = {} - for channel in channel_files: - if Path(channel).suffix == file_extension: - channel_dict = Props.read_from(channel) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - if chmap is not None: - channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" +def merge_channels() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--input", help="input file", nargs="*", type=str, required=True + ) + argparser.add_argument("--output", help="output file", type=str, required=True) + argparser.add_argument( + "--in_db", + help="in db file (used for when lh5 files referred to in db)", + type=str, + required=False, + ) + argparser.add_argument( + "--out_db", + help="lh5 file (used for when lh5 files referred to in db)", + type=str, + required=False, + ) + argparser.add_argument( + "--channelmap", + help="channelmap", + type=str, + required=False, + default=None, + ) + argparser.add_argument( + "--timestamp", + help="timestamp", + type=str, + required=False, + ) + args = argparser.parse_args() + + # change to only have 1 output file for multiple inputs + # don't care about processing step, check if extension matches + + channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input + + file_extension = Path(args.output).suffix + + if args.channelmap is not None: + channel_map = LegendMetadata(args.channelmap, lazy=True) + chmap = channel_map.channelmap(args.timestamp) + else: + chmap = None + + if file_extension == ".dat" or file_extension == ".dir": + out_file = Path(args.output).with_suffix("") + else: + out_file = args.output + + rng = np.random.default_rng() + temp_output = f"{out_file}.{rng.integers(0, 99999):05d}" + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + if file_extension in (".json", ".yaml", ".yml"): + out_dict = {} + for channel in channel_files: + if Path(channel).suffix == file_extension: + channel_dict = Props.read_from(channel) + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel + out_dict[channel_name] = channel_dict else: - channel_name = fkey.channel - out_dict[channel_name] = channel_dict - else: - msg = "Output file extension does not match input file extension" - raise RuntimeError(msg) - - Props.write_to(out_file, out_dict) - -elif file_extension == ".pkl": - out_dict = {} - for channel in channel_files: - with Path(channel).open("rb") as r: - channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) - if chmap is not None: - channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" - else: - channel_name = fkey.channel - out_dict[channel_name] = channel_dict - - with Path(temp_output).open("wb") as w: - pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - - Path(temp_output).rename(out_file) - -elif file_extension == ".dat" or file_extension == ".dir": - common_dict = {} - with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + msg = "Output file extension does not match input file extension" + raise RuntimeError(msg) + + Props.write_to(out_file, out_dict) + + elif file_extension == ".pkl": + out_dict = {} for channel in channel_files: with Path(channel).open("rb") as r: channel_dict = pkl.load(r) - fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name) - if chmap is not None: - channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" - else: - channel_name = fkey.channel - if isinstance(channel_dict, dict) and "common" in list(channel_dict): - chan_common_dict = channel_dict.pop("common") - common_dict[channel_name] = chan_common_dict - shelf[channel_name] = channel_dict - if len(common_dict) > 0: - shelf["common"] = common_dict - - -elif file_extension == ".lh5": - if args.in_db: - db_dict = Props.read_from(args.in_db) - for channel in channel_files: - if Path(channel).suffix == file_extension: fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) if chmap is not None: channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" else: channel_name = fkey.channel - tb_in = lh5.read(f"{channel_name}", channel) - - lh5.write( - tb_in, - name=channel_name, - lh5_file=temp_output, - wo_mode="a", - ) - if args.in_db: - db_dict[channel_name] = replace_path( - db_dict[channel_name], channel, args.output + out_dict[channel_name] = channel_dict + + with Path(temp_output).open("wb") as w: + pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + Path(temp_output).rename(out_file) + + elif file_extension == ".dat" or file_extension == ".dir": + common_dict = {} + with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf: + for channel in channel_files: + with Path(channel).open("rb") as r: + channel_dict = pkl.load(r) + fkey = ChannelProcKey.get_filekey_from_pattern( + Path(channel_files[0]).name + ) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel + if isinstance(channel_dict, dict) and "common" in list(channel_dict): + chan_common_dict = channel_dict.pop("common") + common_dict[channel_name] = chan_common_dict + shelf[channel_name] = channel_dict + if len(common_dict) > 0: + shelf["common"] = common_dict + + elif file_extension == ".lh5": + if args.in_db: + db_dict = Props.read_from(args.in_db) + for channel in channel_files: + if Path(channel).suffix == file_extension: + fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name) + if chmap is not None: + channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}" + else: + channel_name = fkey.channel + tb_in = lh5.read(f"{channel_name}", channel) + + lh5.write( + tb_in, + name=channel_name, + lh5_file=temp_output, + wo_mode="a", ) - else: - msg = "Output file extension does not match input file extension" - raise RuntimeError(msg) - if args.out_db: - Props.write_to(args.out_db, db_dict) + if args.in_db: + db_dict[channel_name] = replace_path( + db_dict[channel_name], channel, args.output + ) + else: + msg = "Output file extension does not match input file extension" + raise RuntimeError(msg) + if args.out_db: + Props.write_to(args.out_db, db_dict) - Path(temp_output).rename(out_file) + Path(temp_output).rename(out_file) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py new file mode 100644 index 0000000..1bea45d --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py @@ -0,0 +1,150 @@ +import argparse +import logging +import pickle as pkl +import time +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from lgdo import Array, Table +from pygama.pargen.dplms_ge_dict import dplms_ge_dict + +from ....log import build_log + + +def par_geds_dsp_dplms() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) + argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str) + argparser.add_argument("--database", help="database", type=str, required=True) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) + argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True) + argparser.add_argument("--plot_path", help="plot_path", type=str) + + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] + + log = build_log(config_dict, args.log) + + log = logging.getLogger(__name__) + sto = lh5.LH5Store() + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + configs = LegendMetadata(args.configs, lazy=True).on( + args.timestamp, system=args.datatype + ) + dsp_config = config_dict["inputs"]["proc_chain"][args.channel] + + dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] + dplms_dict = Props.read_from(dplms_json) + + db_dict = Props.read_from(args.database) + + if dplms_dict["run_dplms"] is True: + with Path(args.fft_raw_filelist).open() as f: + fft_files = sorted(f.read().splitlines()) + + t0 = time.time() + log.info("\nLoad fft data") + energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] + idxs = np.where(energies.nda == 0)[0] + raw_fft = sto.read( + f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs + )[0] + t1 = time.time() + log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") + + log.info("\nRunning event selection") + peaks_kev = np.array(dplms_dict["peaks_kev"]) + # kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] + + peaks_rounded = [int(peak) for peak in peaks_kev] + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ + "peak" + ].nda + ids = np.isin(peaks, peaks_rounded) + peaks = peaks[ids] + # idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] + + raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] + log.info( + f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}" + ) + + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) + + if args.plot_path: + out_dict, plot_dict = dplms_ge_dict( + raw_fft, + raw_cal, + dsp_config, + db_dict, + dplms_dict, + display=1, + ) + if args.inplots: + with Path(args.inplots).open("rb") as r: + inplot_dict = pkl.load(r) + inplot_dict.update({"dplms": plot_dict}) + + else: + out_dict = dplms_ge_dict( + raw_fft, + raw_cal, + dsp_config, + db_dict, + dplms_dict, + ) + + coeffs = out_dict["dplms"].pop("coefficients") + dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) + out_dict["dplms"]["coefficients"] = ( + f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" + ) + + log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") + else: + out_dict = {} + dplms_pars = Table(col_dict={"coefficients": Array([])}) + if args.inplots: + with Path(args.inplots).open("rb") as r: + inplot_dict = pkl.load(r) + else: + inplot_dict = {} + + db_dict.update(out_dict) + + Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) + sto.write( + Table(col_dict={"dplms": dplms_pars}), + name=channel, + lh5_file=args.lh5_path, + wo_mode="overwrite", + ) + + Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.dsp_pars, db_dict) + + if args.plot_path: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as f: + pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py new file mode 100644 index 0000000..4b755c2 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py @@ -0,0 +1,398 @@ +import argparse +import pickle as pkl +import time +import warnings +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +import pygama.pargen.energy_optimisation as om # noqa: F401 +import sklearn.gaussian_process.kernels as ker +from dbetto import TextDB +from dbetto.catalog import Props +from dspeed.units import unit_registry as ureg +from legendmeta import LegendMetadata +from pygama.math.distributions import hpge_peak +from pygama.pargen.dsp_optimize import ( + BayesianOptimizer, + run_bayesian_optimisation, + run_one_dsp, +) + +from ....log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) +warnings.filterwarnings(action="ignore", category=np.RankWarning) + + +def par_geds_dsp_eopt() -> None: + argparser = argparse.ArgumentParser() + + argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument( + "--final_dsp_pars", help="final_dsp_pars", type=str, required=True + ) + argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) + argparser.add_argument("--plot_path", help="plot_path", type=str) + + argparser.add_argument( + "--plot_save_path", help="plot_save_path", type=str, required=False + ) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] + + log = build_log(config_dict, args.log) + + sto = lh5.LH5Store() + t0 = time.time() + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + dsp_config = config_dict["inputs"]["processing_chain"][args.channel] + opt_json = config_dict["inputs"]["optimiser_config"][args.channel] + + opt_dict = Props.read_from(opt_json) + db_dict = Props.read_from(args.decay_const) + + if opt_dict.pop("run_eopt") is True: + peaks_kev = np.array(opt_dict["peaks"]) + kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]] + + kwarg_dicts_cusp = [] + kwarg_dicts_trap = [] + kwarg_dicts_zac = [] + for peak in peaks_kev: + peak_idx = np.where(peaks_kev == peak)[0][0] + kev_width = kev_widths[peak_idx] + + kwarg_dicts_cusp.append( + { + "parameter": "cuspEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + "bin_width": 5, + } + ) + kwarg_dicts_zac.append( + { + "parameter": "zacEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + "bin_width": 5, + } + ) + kwarg_dicts_trap.append( + { + "parameter": "trapEmax", + "func": hpge_peak, + "peak": peak, + "kev_width": kev_width, + "bin_width": 5, + } + ) + + peaks_rounded = [int(peak) for peak in peaks_kev] + peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ + "peak" + ].nda + ids = np.isin(peaks, peaks_rounded) + peaks = peaks[ids] + idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] + + tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] + + t1 = time.time() + log.info(f"Data Loaded in {(t1-t0)/60} minutes") + + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) + + dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"] + + init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) + full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]] + flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10 + + if flat_val < 1.0: + flat_val = 1.0 + elif flat_val > 4: + flat_val = 4 + flat_val = f"{flat_val}*us" + + db_dict["cusp"] = {"flat": flat_val} + db_dict["zac"] = {"flat": flat_val} + db_dict["etrap"] = {"flat": flat_val} + + tb_data.add_column("dt_eff", init_data["dt_eff"]) + + dsp_config["processors"].pop("dt_eff") + + dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"] + + kwarg_dict = [ + { + "peak_dicts": kwarg_dicts_cusp, + "ctc_param": "dt_eff", + "idx_list": idx_list, + "peaks_kev": peaks_kev, + }, + { + "peak_dicts": kwarg_dicts_zac, + "ctc_param": "dt_eff", + "idx_list": idx_list, + "peaks_kev": peaks_kev, + }, + { + "peak_dicts": kwarg_dicts_trap, + "ctc_param": "dt_eff", + "idx_list": idx_list, + "peaks_kev": peaks_kev, + }, + ] + + fom = eval(opt_dict["fom"]) + out_field = opt_dict["fom_field"] + out_err_field = opt_dict["fom_err_field"] + sample_x = np.array(opt_dict["initial_samples"]) + + results_cusp = [] + results_zac = [] + results_trap = [] + + sample_y_cusp = [] + sample_y_zac = [] + sample_y_trap = [] + + err_y_cusp = [] + err_y_zac = [] + err_y_trap = [] + + for i, x in enumerate(sample_x): + db_dict["cusp"]["sigma"] = f"{x[0]}*us" + db_dict["zac"]["sigma"] = f"{x[0]}*us" + db_dict["etrap"]["rise"] = f"{x[0]}*us" + + log.info(f"Initialising values {i+1} : {db_dict}") + + tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) + + res = fom(tb_out, kwarg_dict[0]) + results_cusp.append(res) + sample_y_cusp.append(res[out_field]) + err_y_cusp.append(res[out_err_field]) + + res = fom(tb_out, kwarg_dict[1]) + results_zac.append(res) + sample_y_zac.append(res[out_field]) + err_y_zac.append(res[out_err_field]) + + res = fom(tb_out, kwarg_dict[2]) + results_trap.append(res) + sample_y_trap.append(res[out_field]) + err_y_trap.append(res[out_err_field]) + + log.info(f"{i+1} Finished") + + if np.isnan(sample_y_cusp).all(): + max_cusp = opt_dict["nan_default"] + else: + max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2) + if np.isnan(sample_y_zac).all(): + max_zac = opt_dict["nan_default"] + else: + max_zac = np.ceil(np.nanmax(sample_y_zac) * 2) + if np.isnan(sample_y_trap).all(): + max_trap = opt_dict["nan_default"] + else: + max_trap = np.ceil(np.nanmax(sample_y_trap) * 2) + + nan_vals = [max_cusp, max_zac, max_trap] + + for i in range(len(sample_x)): + if np.isnan(sample_y_cusp[i]): + results_cusp[i]["y_val"] = max_cusp + sample_y_cusp[i] = max_cusp + + if np.isnan(sample_y_zac[i]): + results_zac[i]["y_val"] = max_zac + sample_y_zac[i] = max_zac + + if np.isnan(sample_y_trap[i]): + results_trap[i]["y_val"] = max_trap + sample_y_trap[i] = max_trap + + kernel = ( + ker.ConstantKernel(2.0, constant_value_bounds="fixed") + + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5]) + + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1)) + ) + + lambda_param = 5 + sampling_rate = tb_data["waveform_presummed"]["dt"][0] + sampling_unit = ureg.Quantity( + tb_data["waveform_presummed"]["dt"].attrs["units"] + ) + waveform_sampling = sampling_rate * sampling_unit + + bopt_cusp = BayesianOptimizer( + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, + fom_value=out_field, + fom_error=out_err_field, + ) + bopt_cusp.lambda_param = lambda_param + bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") + + bopt_zac = BayesianOptimizer( + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, + fom_value=out_field, + fom_error=out_err_field, + ) + bopt_zac.lambda_param = lambda_param + bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") + + bopt_trap = BayesianOptimizer( + acq_func=opt_dict["acq_func"], + batch_size=opt_dict["batch_size"], + kernel=kernel, + sampling_rate=waveform_sampling, + fom_value=out_field, + fom_error=out_err_field, + ) + bopt_trap.lambda_param = lambda_param + bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") + + bopt_cusp.add_initial_values( + x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp + ) + bopt_zac.add_initial_values( + x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac + ) + bopt_trap.add_initial_values( + x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap + ) + + best_idx = np.nanargmin(sample_y_cusp) + bopt_cusp.optimal_results = results_cusp[best_idx] + bopt_cusp.optimal_x = sample_x[best_idx] + + best_idx = np.nanargmin(sample_y_zac) + bopt_zac.optimal_results = results_zac[best_idx] + bopt_zac.optimal_x = sample_x[best_idx] + + best_idx = np.nanargmin(sample_y_trap) + bopt_trap.optimal_results = results_trap[best_idx] + bopt_trap.optimal_x = sample_x[best_idx] + + optimisers = [bopt_cusp, bopt_zac, bopt_trap] + + out_param_dict, out_results_list = run_bayesian_optimisation( + tb_data, + dsp_config, + [fom], + optimisers, + fom_kwargs=kwarg_dict, + db_dict=db_dict, + nan_val=nan_vals, + n_iter=opt_dict["n_iter"], + ) + + Props.add_to(db_dict, out_param_dict) + + # db_dict.update(out_param_dict) + + t2 = time.time() + log.info(f"Optimiser finished in {(t2-t1)/60} minutes") + + out_alpha_dict = {} + out_alpha_dict["cuspEmax_ctc"] = { + "expression": "cuspEmax*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["cuspEftp_ctc"] = { + "expression": "cuspEftp*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["zacEmax_ctc"] = { + "expression": "zacEmax*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["zacEftp_ctc"] = { + "expression": "zacEftp*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["trapEmax_ctc"] = { + "expression": "trapEmax*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, + } + + out_alpha_dict["trapEftp_ctc"] = { + "expression": "trapEftp*(1+dt_eff*a)", + "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, + } + if "ctc_params" in db_dict: + db_dict["ctc_params"].update(out_alpha_dict) + else: + db_dict.update({"ctc_params": out_alpha_dict}) + + Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.qbb_grid_path).open("wb") as f: + pkl.dump(optimisers, f) + + else: + Path(args.qbb_grid_path).touch() + + Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.final_dsp_pars, db_dict) + + if args.plot_path: + if args.inplots: + with Path(args.inplots).open("rb") as r: + plot_dict = pkl.load(r) + else: + plot_dict = {} + + plot_dict["trap_optimisation"] = { + "kernel_space": bopt_trap.plot(init_samples=sample_x), + "acq_space": bopt_trap.plot_acq(init_samples=sample_x), + } + + plot_dict["cusp_optimisation"] = { + "kernel_space": bopt_cusp.plot(init_samples=sample_x), + "acq_space": bopt_cusp.plot_acq(init_samples=sample_x), + } + + plot_dict["zac_optimisation"] = { + "kernel_space": bopt_zac.plot(init_samples=sample_x), + "acq_space": bopt_zac.plot_acq(init_samples=sample_x), + } + + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_path).open("wb") as w: + pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py similarity index 98% rename from workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py rename to workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py index 2c01421..e9b1de6 100644 --- a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py @@ -16,7 +16,7 @@ from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids from pygama.pargen.dsp_optimize import run_one_dsp -from ..log import build_log +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -80,7 +80,7 @@ def get_out_data( return out_tbl, len(np.where(final_mask)[0]) -if __name__ == "__main__": +def par_geds_dsp_evtsel() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) argparser.add_argument( @@ -168,10 +168,6 @@ def get_out_data( if lh5_path[-1] != "/": lh5_path += "/" - raw_fields = [ - field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path) - ] - tb = sto.read( lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"] )[0] diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py new file mode 100644 index 0000000..691a0e8 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py @@ -0,0 +1,112 @@ +import argparse +import pickle as pkl +import time +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +import pygama.pargen.noise_optimization as pno +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes +from pygama.pargen.dsp_optimize import run_one_dsp + +from ....log import build_log + + +def par_geds_dsp_nopt() -> None: + sto = lh5.LH5Store() + + argparser = argparse.ArgumentParser() + argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) + argparser.add_argument("--database", help="database", type=str, required=True) + argparser.add_argument("--inplots", help="inplots", type=str) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) + argparser.add_argument("--plot_path", help="plot_path", type=str) + + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] + + log = build_log(config_dict, args.log) + + t0 = time.time() + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + dsp_config = config_dict["inputs"]["processing_chain"][args.channel] + opt_json = config_dict["inputs"]["optimiser_config"][args.channel] + + opt_dict = Props.read_from(opt_json) + db_dict = Props.read_from(args.database) + + if opt_dict.pop("run_nopt") is True: + with Path(args.raw_filelist).open() as f: + files = f.read().splitlines() + + raw_files = sorted(files) + + energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] + idxs = np.where(energies.nda == 0)[0] + tb_data = sto.read( + f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs + )[0] + t1 = time.time() + log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") + + log.info(f"Select baselines {len(tb_data)}") + dsp_data = run_one_dsp(tb_data, dsp_config) + cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) + cut_idxs = get_cut_indexes(dsp_data, cut_dict) + tb_data = sto.read( + f"{channel}/raw", + raw_files, + n_rows=opt_dict.pop("n_events"), + idx=idxs[cut_idxs], + )[0] + log.info(f"... {len(tb_data)} baselines after cuts") + + if isinstance(dsp_config, (str, list)): + dsp_config = Props.read_from(dsp_config) + + if args.plot_path: + out_dict, plot_dict = pno.noise_optimization( + tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 + ) + else: + out_dict = pno.noise_optimization( + raw_files, dsp_config, db_dict.copy(), opt_dict, channel + ) + + t2 = time.time() + log.info(f"Optimiser finished in {(t2-t0)/60} minutes") + else: + out_dict = {} + plot_dict = {} + + if args.plot_path: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + if args.inplots: + with Path(args.inplots).open("rb") as r: + old_plot_dict = pkl.load(r) + plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) + else: + plot_dict = {"noise_optimisation": plot_dict} + with Path(args.plot_path).open("wb") as f: + pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) + + Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py new file mode 100644 index 0000000..d4a1e22 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py @@ -0,0 +1,26 @@ +import argparse +from pathlib import Path + +from dbetto.catalog import Props + + +def par_geds_dsp_svm() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument( + "--output_file", help="output par file", type=str, required=True + ) + argparser.add_argument( + "--input_file", help="input par file", type=str, required=True + ) + argparser.add_argument("--svm_file", help="svm file", required=True) + args = argparser.parse_args() + + par_data = Props.read_from(args.input_file) + + file = f"'$_/{Path(args.svm_file).name}'" + + par_data["svm"] = {"model_file": file} + + Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.output_file, par_data) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py new file mode 100644 index 0000000..162ccfa --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py @@ -0,0 +1,63 @@ +import argparse +import pickle as pkl +from pathlib import Path + +from dbetto import TextDB +from dbetto.catalog import Props +from lgdo import lh5 +from sklearn.svm import SVC + +from ....log import build_log + + +def par_geds_dsp_svm_build() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--log", help="log file", type=str) + argparser.add_argument("--configs", help="config file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + + argparser.add_argument( + "--output_file", help="output SVM file", type=str, required=True + ) + argparser.add_argument( + "--train_data", help="input data file", type=str, required=True + ) + argparser.add_argument( + "--train_hyperpars", help="input hyperparameter file", required=True + ) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] + + log = build_log(config_dict, args.log) + + # Load files + tb = lh5.read("ml_train/dsp", args.train_data) + log.debug("loaded data") + + hyperpars = Props.read_from(args.train_hyperpars) + + # Define training inputs + dwts_norm = tb["dwt_norm"].nda + labels = tb["dc_label"].nda + + log.debug("training model") + # Initialize and train SVM + svm = SVC( + random_state=int(hyperpars["random_state"]), + kernel=hyperpars["kernel"], + decision_function_shape=hyperpars["decision_function_shape"], + class_weight=hyperpars["class_weight"], + C=float(hyperpars["C"]), + gamma=float(hyperpars["gamma"]), + ) + + svm.fit(dwts_norm, labels) + log.debug("trained model") + + # Save trained model with pickle + with Path(args.output_file).open("wb") as svm_file: + pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py new file mode 100644 index 0000000..4d493a1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py @@ -0,0 +1,146 @@ +import argparse +import pickle as pkl +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids +from pygama.pargen.dsp_optimize import run_one_dsp +from pygama.pargen.extract_tau import ExtractTau + +from ....log import build_log + + +def par_geds_dsp_tau() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--configs", help="configs path", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--plot_path", help="plot path", type=str, required=False) + argparser.add_argument("--output_file", help="output file", type=str, required=True) + + argparser.add_argument( + "--pulser_file", help="pulser file", type=str, required=False + ) + + argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) + argparser.add_argument( + "--tcm_files", help="tcm_files", nargs="*", type=str, required=False + ) + args = argparser.parse_args() + + sto = lh5.LH5Store() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] + + log = build_log(config_dict, args.log) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + channel_dict = config_dict["inputs"]["processing_chain"][args.channel] + kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] + + kwarg_dict = Props.read_from(kwarg_dict) + + if kwarg_dict["run_tau"] is True: + dsp_config = Props.read_from(channel_dict) + kwarg_dict.pop("run_tau") + if ( + isinstance(args.raw_files, list) + and args.raw_files[0].split(".")[-1] == "filelist" + ): + input_file = args.raw_files[0] + with Path(input_file).open() as f: + input_file = f.read().splitlines() + else: + input_file = args.raw_files + + if args.pulser_file: + pulser_dict = Props.read_from(args.pulser_file) + mask = np.array(pulser_dict["mask"]) + + elif args.tcm_filelist: + # get pulser mask from tcm files + with Path(args.tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data = sto.read( + f"{channel}/raw", + input_file, + field_mask=["daqenergy", "timestamp", "t_sat_lo"], + )[0].view_as("pd") + threshold = kwarg_dict.pop("threshold") + + discharges = data["t_sat_lo"] > 0 + discharge_timestamps = np.where(data["timestamp"][discharges])[0] + is_recovering = np.full(len(data), False, dtype=bool) + for tstamp in discharge_timestamps: + is_recovering = is_recovering | np.where( + ( + ((data["timestamp"] - tstamp) < 0.01) + & ((data["timestamp"] - tstamp) > 0) + ), + True, + False, + ) + cuts = np.where( + (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering) + )[0] + + tb_data = sto.read( + f"{channel}/raw", + input_file, + idx=cuts, + n_rows=kwarg_dict.pop("n_events"), + )[0] + + tb_out = run_one_dsp(tb_data, dsp_config) + log.debug("Processed Data") + cut_parameters = kwarg_dict.get("cut_parameters", None) + if cut_parameters is not None: + idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters) + log.debug("Applied cuts") + log.debug(f"{len(idxs)} events passed cuts") + else: + idxs = np.full(len(tb_out), True, dtype=bool) + + tau = ExtractTau(dsp_config, kwarg_dict["wf_field"]) + slopes = tb_out["tail_slope"].nda + log.debug("Calculating pz constant") + + tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) + + if args.plot_path: + Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) + + plot_dict = tau.plot_waveforms_after_correction( + tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") + ) + plot_dict.update(tau.plot_slopes(slopes[idxs])) + + with Path(args.plot_path).open("wb") as f: + pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) + out_dict = tau.output_dict + else: + out_dict = {} + + Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.output_file, out_dict) diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py new file mode 100644 index 0000000..2b6c6e1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py @@ -0,0 +1,262 @@ +from __future__ import annotations + +import argparse +import pickle as pkl +import warnings +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.AoE_cal import * # noqa: F403 +from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.utils import load_data + +from ....convert_np import convert_dict_np_to_float +from ....log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +def get_results_dict(aoe_class): + return { + "cal_energy_param": aoe_class.cal_energy_param, + "dt_param": aoe_class.dt_param, + "rt_correction": aoe_class.dt_corr, + "1000-1300keV": aoe_class.timecorr_df.to_dict("index"), + "correction_fit_results": aoe_class.energy_corr_res_dict, + "low_cut": aoe_class.low_cut_val, + "high_cut": aoe_class.high_cut_val, + "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), + "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), + } + + +def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): + if plot_dict is not None: + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](aoe_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](aoe_class, data) + else: + plot_dict = {} + return plot_dict + + +def par_geds_hit_aoe() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("files", help="files", nargs="*", type=str) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) + argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) + argparser.add_argument("--hit_pars", help="hit_pars", type=str) + argparser.add_argument("--aoe_results", help="aoe_results", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] + + log = build_log(config_dict, args.log) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) + + ecal_dict = Props.read_from(args.ecal_file) + cal_dict = ecal_dict["pars"] + eres_dict = ecal_dict["results"]["ecal"] + + with Path(args.eres_file).open("rb") as o: + object_dict = pkl.load(o) + + if kwarg_dict["run_aoe"] is True: + kwarg_dict.pop("run_aoe") + + pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak + + sigma_func = ( + eval(kwarg_dict.pop("sigma_func")) + if "sigma_func" in kwarg_dict + else SigmaFit + ) + + mean_func = ( + eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 + ) + + if "plot_options" in kwarg_dict: + for field, item in kwarg_dict["plot_options"].items(): + kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) + + with Path(args.files[0]).open() as f: + files = f.read().splitlines() + files = sorted(files) + + try: + eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() + + def eres_func(x): + return eval(eres["expression"], dict(x=x, **eres["parameters"])) + + except KeyError: + + def eres_func(x): + return x * np.nan + + params = [ + kwarg_dict["current_param"], + "tp_0_est", + "tp_99", + kwarg_dict["energy_param"], + kwarg_dict["cal_energy_param"], + kwarg_dict["cut_field"], + "timestamp", + ] + + if "dt_param" in kwarg_dict: + params += kwarg_dict["dt_param"] + else: + params += "dt_eff" + + if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None: + cal_dict.update(kwarg_dict["dt_cut"]["cut"]) + params.append(kwarg_dict["dt_cut"]["out_param"]) + + # load data in + data, threshold_mask = load_data( + files, + f"{channel}/dsp", + cal_dict, + params=params, + threshold=kwarg_dict.pop("threshold"), + return_selection_mask=True, + ) + + if args.pulser_file: + pulser_dict = Props.read_from(args.pulser_file) + mask = np.array(pulser_dict["mask"]) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with Path(args.tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data["is_pulser"] = mask[threshold_mask] + + data["AoE_Uncorr"] = ( + data[kwarg_dict["current_param"]] / data[kwarg_dict["energy_param"]] + ) + aoe = CalAoE( + cal_dicts=cal_dict, + cal_energy_param=kwarg_dict["cal_energy_param"], + eres_func=eres_func, + pdf=pdf, + mean_func=mean_func, + sigma_func=sigma_func, + selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", + dt_corr=kwarg_dict.get("dt_corr", False), + dep_correct=kwarg_dict.get("dep_correct", False), + dt_cut=kwarg_dict.get("dt_cut", None), + dt_param=kwarg_dict.get("dt_param", 3), + high_cut_val=kwarg_dict.get("high_cut_val", 3), + compt_bands_width=kwarg_dict.get("debug_mode", 20), + debug_mode=args.debug | kwarg_dict.get("debug_mode", False), + ) + aoe.update_cal_dicts( + { + "AoE_Uncorr": { + "expression": f"{kwarg_dict['current_param']}/{kwarg_dict['energy_param']}", + "parameters": {}, + } + } + ) + aoe.calibrate(data, "AoE_Uncorr") + + log.info("Calibrated A/E") + out_dict = get_results_dict(aoe) + plot_dict = fill_plot_dict(aoe, data, kwarg_dict.get("plot_options", None)) + + aoe.pdf = aoe.pdf.name + + # need to change eres func as can't pickle lambdas + try: + aoe.eres_func = eres_dict[kwarg_dict["cal_energy_param"]][ + "eres_linear" + ].copy() + except KeyError: + aoe.eres_func = {} + else: + out_dict = {} + plot_dict = {} + aoe = None + + if args.plot_file: + common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None + if args.inplots: + with Path(args.inplots).open("rb") as r: + out_plot_dict = pkl.load(r) + out_plot_dict.update({"aoe": plot_dict}) + else: + out_plot_dict = {"aoe": plot_dict} + + if "common" in list(out_plot_dict) and common_dict is not None: + out_plot_dict["common"].update(common_dict) + elif common_dict is not None: + out_plot_dict["common"] = common_dict + + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: + pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) + results_dict = dict(**ecal_dict["results"], aoe=out_dict) + final_hit_dict = { + "pars": {"operations": cal_dict}, + "results": results_dict, + } + + final_hit_dict = convert_dict_np_to_float(final_hit_dict) + + Props.write_to(args.hit_pars, final_hit_dict) + + Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) + final_object_dict = dict( + **object_dict, + aoe=aoe, + ) + with Path(args.aoe_results).open("wb") as w: + pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py similarity index 99% rename from workflow/src/legenddataflow/scripts/pars_hit_ecal.py rename to workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py index 725fc84..c67e304 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py @@ -2,7 +2,6 @@ import argparse import copy -import logging import pickle as pkl import warnings from datetime import datetime @@ -24,10 +23,9 @@ from pygama.pargen.utils import load_data from scipy.stats import binned_statistic -from ..convert_np import convert_dict_np_to_float -from ..log import build_log +from ....convert_np import convert_dict_np_to_float +from ....log import build_log -log = logging.getLogger(__name__) mpl.use("agg") sto = lh5.LH5Store() @@ -437,7 +435,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): } -if __name__ == "__main__": +def par_geds_hit_ecal() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="filelist", nargs="*", type=str) argparser.add_argument( @@ -478,7 +476,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string): msg = "invalid tier" raise ValueError(msg) - log = build_log(config_dict, args.log) + build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp) diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py new file mode 100644 index 0000000..357fe33 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +import argparse +import pickle as pkl +import warnings +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.math.distributions import gaussian +from pygama.pargen.AoE_cal import * # noqa: F403 +from pygama.pargen.data_cleaning import get_tcm_pulser_ids +from pygama.pargen.lq_cal import * # noqa: F403 +from pygama.pargen.lq_cal import LQCal +from pygama.pargen.utils import load_data + +from ....convert_np import convert_dict_np_to_float +from ....log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +def get_results_dict(lq_class): + return { + "cal_energy_param": lq_class.cal_energy_param, + "DEP_means": lq_class.timecorr_df.to_dict("index"), + "rt_correction": lq_class.dt_fit_pars, + "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), + "cut_value": lq_class.cut_val, + "sfs": lq_class.low_side_sf.to_dict("index"), + } + + +def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): + if plot_dict is not None: + for key, item in plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](lq_class, data, **item["options"]) + else: + plot_dict[key] = item["function"](lq_class, data) + else: + plot_dict = {} + return plot_dict + + +def par_geds_hit_lq() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("files", help="files", nargs="*", type=str) + argparser.add_argument( + "--pulser_file", help="pulser_file", type=str, required=False + ) + argparser.add_argument( + "--tcm_filelist", help="tcm_filelist", type=str, required=False + ) + + argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) + argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) + argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) + argparser.add_argument("--hit_pars", help="hit_pars", type=str) + argparser.add_argument("--lq_results", help="lq_results", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] + + log = build_log(config_dict, args.log) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + + channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] + kwarg_dict = Props.read_from(channel_dict) + + ecal_dict = Props.read_from(args.ecal_file) + cal_dict = ecal_dict["pars"]["operations"] + eres_dict = ecal_dict["results"]["ecal"] + + with Path(args.eres_file).open("rb") as o: + object_dict = pkl.load(o) + + if kwarg_dict["run_lq"] is True: + kwarg_dict.pop("run_lq") + + cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian + + if "plot_options" in kwarg_dict: + for field, item in kwarg_dict["plot_options"].items(): + kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) + + with Path(args.files[0]).open() as f: + files = f.read().splitlines() + files = sorted(files) + + try: + eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() + + def eres_func(x): + return eval(eres["expression"], dict(x=x, **eres["parameters"])) + + except KeyError: + + def eres_func(x): + return x * np.nan + + params = [ + "lq80", + "dt_eff", + kwarg_dict["energy_param"], + kwarg_dict["cal_energy_param"], + kwarg_dict["cut_field"], + ] + + # load data in + data, threshold_mask = load_data( + files, + f"{channel}/dsp", + cal_dict, + params=params, + threshold=kwarg_dict.pop("threshold"), + return_selection_mask=True, + ) + + if args.pulser_file: + pulser_dict = Props.read_from(args.pulser_file) + mask = np.array(pulser_dict["mask"]) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") + + elif args.tcm_filelist: + # get pulser mask from tcm files + with Path(args.tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + data["is_pulser"] = mask[threshold_mask] + + lq = LQCal( + cal_dict, + kwarg_dict["cal_energy_param"], + kwarg_dict["dt_param"], + eres_func, + cdf, + selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", + debug_mode=args.debug_mode | kwarg_dict.get("debug_mode", False), + ) + + data["LQ_Ecorr"] = np.divide(data["lq80"], data[kwarg_dict["energy_param"]]) + + lq.update_cal_dicts( + { + "LQ_Ecorr": { + "expression": f"lq80/{kwarg_dict['energy_param']}", + "parameters": {}, + } + } + ) + + lq.calibrate(data, "LQ_Ecorr") + log.info("Calibrated LQ") + + out_dict = get_results_dict(lq) + plot_dict = fill_plot_dict(lq, data, kwarg_dict.get("plot_options", None)) + + # need to change eres func as can't pickle lambdas + try: + lq.eres_func = eres_dict[kwarg_dict["cal_energy_param"]][ + "eres_linear" + ].copy() + except KeyError: + lq.eres_func = {} + else: + out_dict = {} + plot_dict = {} + lq = None + + if args.plot_file: + common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None + if args.inplots: + with Path(args.inplots).open("rb") as r: + out_plot_dict = pkl.load(r) + out_plot_dict.update({"lq": plot_dict}) + else: + out_plot_dict = {"lq": plot_dict} + + if "common" in list(out_plot_dict) and common_dict is not None: + out_plot_dict["common"].update(common_dict) + elif common_dict is not None: + out_plot_dict["common"] = common_dict + + Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) + with Path(args.plot_file).open("wb") as w: + pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) + + final_hit_dict = convert_dict_np_to_float( + { + "pars": {"operations": cal_dict}, + "results": dict(**eres_dict, lq=out_dict), + } + ) + Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.hit_pars, final_hit_dict) + + final_object_dict = dict( + **object_dict, + lq=lq, + ) + Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) + with Path(args.lq_results).open("wb") as w: + pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py similarity index 98% rename from workflow/src/legenddataflow/scripts/pars_hit_qc.py rename to workflow/src/legenddataflow/scripts/par/geds/hit/qc.py index 5e6a378..acc1a32 100644 --- a/workflow/src/legenddataflow/scripts/pars_hit_qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import re import warnings @@ -20,15 +19,13 @@ ) from pygama.pargen.utils import load_data -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -log = logging.getLogger(__name__) +from ....convert_np import convert_dict_np_to_float +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) -if __name__ == "__main__": +def par_geds_hit_qc() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py similarity index 84% rename from workflow/src/legenddataflow/scripts/pars_pht_aoecal.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py index 8aad849..12c70f8 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py @@ -2,12 +2,10 @@ import argparse import copy -import logging import pickle as pkl import re import warnings from pathlib import Path -from typing import Callable import numpy as np import pandas as pd @@ -19,10 +17,9 @@ from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log -log = logging.getLogger(__name__) warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -77,62 +74,6 @@ def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): return plot_dict -def aoe_calibration( - data: pd.Dataframe, - cal_dicts: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_func: Callable, - pdf: Callable = aoe_peak, - selection_string: str = "", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict | None = None, - high_cut_val: int = 3, - mean_func: Callable = Pol1, - sigma_func: Callable = SigmaFit, - # dep_acc: float = 0.9, - dt_param: str = "dt_eff", - comptBands_width: int = 20, - plot_options: dict | None = None, - debug_mode: bool = False, -): - data["AoE_Uncorr"] = data[current_param] / data[energy_param] - aoe = CalAoE( - cal_dicts=cal_dicts, - cal_energy_param=cal_energy_param, - eres_func=eres_func, - pdf=pdf, - selection_string=selection_string, - dt_corr=dt_corr, - dep_correct=dep_correct, - dt_cut=dt_cut, - dt_param=dt_param, - high_cut_val=high_cut_val, - mean_func=mean_func, - sigma_func=sigma_func, - compt_bands_width=comptBands_width, - debug_mode=debug_mode | args.debug, - ) - aoe.update_cal_dicts( - { - "AoE_Uncorr": { - "expression": f"{current_param}/{energy_param}", - "parameters": {}, - } - } - ) - aoe.calibrate(data, "AoE_Uncorr") - log.info("Calibrated A/E") - return ( - cal_dicts, - get_results_dict(aoe), - fill_plot_dict(aoe, data, plot_options), - aoe, - ) - - def run_aoe_calibration( data, cal_dicts, @@ -143,6 +84,7 @@ def run_aoe_calibration( configs, channel, datatype, + debug_mode=False, # gen_plots=True, ): configs = LegendMetadata(path=configs) @@ -211,28 +153,50 @@ def eres_func(x): def eres_func(x): return x * np.nan - cal_dicts, out_dict, aoe_plot_dict, aoe_obj = aoe_calibration( - data, - selection_string=f"{kwarg_dict.pop('final_cut_field')}&(~is_pulser)", + data["AoE_Uncorr"] = ( + data[kwarg_dict["current_param"]] / data[kwarg_dict["energy_param"]] + ) + aoe = CalAoE( cal_dicts=cal_dicts, + cal_energy_param=kwarg_dict["cal_energy_param"], eres_func=eres_func, pdf=pdf, mean_func=mean_func, sigma_func=sigma_func, - **kwarg_dict, + selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", + dt_corr=kwarg_dict.get("dt_corr", False), + dep_correct=kwarg_dict.get("dep_correct", False), + dt_cut=kwarg_dict.get("dt_cut", None), + dt_param=kwarg_dict.get("dt_param", 3), + high_cut_val=kwarg_dict.get("high_cut_val", 3), + compt_bands_width=kwarg_dict.get("debug_mode", 20), + debug_mode=debug_mode | kwarg_dict.get("debug_mode", False), ) - aoe_obj.pdf = aoe_obj.pdf.name + aoe.update_cal_dicts( + { + "AoE_Uncorr": { + "expression": f"{kwarg_dict['current_param']}/{kwarg_dict['energy_param']}", + "parameters": {}, + } + } + ) + aoe.calibrate(data, "AoE_Uncorr") + + out_dict = get_results_dict(aoe) + plot_dict = fill_plot_dict(aoe, data, kwarg_dict.get("plot_options", None)) + + aoe.pdf = aoe.pdf.name # need to change eres func as can't pickle lambdas try: - aoe_obj.eres_func = results_dicts[next(iter(results_dicts))][ - "partition_ecal" - ][kwarg_dict["cal_energy_param"]]["eres_linear"] + aoe.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][ + kwarg_dict["cal_energy_param"] + ]["eres_linear"] except KeyError: - aoe_obj.eres_func = {} + aoe.eres_func = {} else: out_dict = {tstamp: None for tstamp in cal_dicts} aoe_plot_dict = {} - aoe_obj = None + aoe = None out_result_dicts = {} for tstamp, result_dict in results_dicts.items(): @@ -240,7 +204,7 @@ def eres_func(x): out_object_dicts = {} for tstamp, object_dict in object_dicts.items(): - out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe_obj) + out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe) common_dict = ( aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None @@ -257,7 +221,7 @@ def eres_func(x): return cal_dicts, out_result_dicts, out_object_dicts, out_plot_dicts -if __name__ == "__main__": +def par_geds_pht_aoe() -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "--input_files", help="files", type=str, nargs="*", required=True @@ -298,7 +262,7 @@ def eres_func(x): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"]["pars_pht_aoecal"] - log = build_log(config_dict, args.log) + build_log(config_dict, args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) @@ -424,6 +388,7 @@ def eres_func(x): args.configs, args.channel, args.datatype, + debug_mode=args.debug, # gen_plots=bool(args.plot_file), ) @@ -436,7 +401,7 @@ def eres_func(x): for out in sorted(args.hit_pars): fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) final_hit_dict = { - "pars": cal_dict[fk.timestamp], + "pars": cal_dicts[fk.timestamp], "results": results_dicts[fk.timestamp], } Path(out).parent.mkdir(parents=True, exist_ok=True) @@ -446,4 +411,4 @@ def eres_func(x): fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name) Path(out).parent.mkdir(parents=True, exist_ok=True) with Path(out).open("wb") as w: - pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) + pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py similarity index 99% rename from workflow/src/legenddataflow/scripts/pars_pht_partcal.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py index bd2d93f..560a063 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py @@ -19,8 +19,8 @@ from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py similarity index 95% rename from workflow/src/legenddataflow/scripts/pars_pht_fast.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/fast.py index 6dda1b7..0faa42d 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_fast.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py @@ -11,14 +11,14 @@ from dbetto import TextDB from dbetto.catalog import Props from legendmeta import LegendMetadata -from pars_pht_aoecal import run_aoe_calibration -from pars_pht_lqcal import run_lq_calibration -from pars_pht_partcal import calibrate_partition from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data +from workflow.src.legenddataflow.scripts.par.geds.pht.aoe import run_aoe_calibration +from workflow.src.legenddataflow.scripts.par.geds.pht.lq import run_lq_calibration +from workflow.src.legenddataflow.scripts.par.geds.pht.partcal import calibrate_partition -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -42,7 +42,7 @@ def run_splitter(files): return run_files -if __name__ == "__main__": +def par_geds_pht_fast() -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "--input_files", help="files", type=str, nargs="*", required=True @@ -83,7 +83,7 @@ def run_splitter(files): configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) config_dict = configs["snakemake_rules"] - log = build_log(config_dict["pars_pht_partcal"], args.log) + build_log(config_dict["pars_pht_partcal"], args.log) meta = LegendMetadata(path=args.metadata) chmap = meta.channelmap(args.timestamp, system=args.datatype) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py similarity index 100% rename from workflow/src/legenddataflow/scripts/pars_pht_lqcal.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/lq.py diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py similarity index 98% rename from workflow/src/legenddataflow/scripts/pars_pht_qc.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/qc.py index feee4e5..af6dc95 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import re import warnings @@ -20,10 +19,8 @@ ) from pygama.pargen.utils import load_data -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -log = logging.getLogger(__name__) +from ....convert_np import convert_dict_np_to_float +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py similarity index 97% rename from workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py rename to workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py index 71167df..38f5e20 100644 --- a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py @@ -2,7 +2,6 @@ import argparse import json -import logging import pickle as pkl import re import warnings @@ -19,10 +18,8 @@ get_keys, ) -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -log = logging.getLogger(__name__) +from ....convert_np import convert_dict_np_to_float +from ....log import build_log warnings.filterwarnings(action="ignore", category=RuntimeWarning) diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py new file mode 100644 index 0000000..65508a2 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py @@ -0,0 +1,160 @@ +import argparse +import pickle as pkl +from datetime import datetime +from pathlib import Path + +import matplotlib as mpl +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import numpy as np +from dbetto.catalog import Props +from legendmeta import LegendMetadata + +from ....FileKey import ChannelProcKey + +mpl.use("Agg") + + +def par_geds_psp_average() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--input", help="input files", nargs="*", type=str, required=True + ) + argparser.add_argument( + "--output", help="output file", nargs="*", type=str, required=True + ) + argparser.add_argument( + "--in_plots", help="input plot files", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--out_plots", help="output plot files", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--in_obj", help="input object files", nargs="*", type=str, required=False + ) + argparser.add_argument( + "--out_obj", help="output object files", nargs="*", type=str, required=False + ) + + argparser.add_argument("--log", help="log_file", type=str) + argparser.add_argument("--configs", help="configs", type=str, required=True) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + args = argparser.parse_args() + + configs = LegendMetadata(args.configs, lazy=True).on( + args.timestamp, system=args.datatype + ) + merge_config = Props.read_from( + configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] + ) + + ave_fields = merge_config["average_fields"] + + # partitions could be different for different channels - do separately for each channel + in_dicts = {} + for file in args.input: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + in_dicts[tstamp] = Props.read_from(file) + + plot_dict = {} + for field in ave_fields: + keys = field.split(".") + vals = [] + for _tstamp, tstamp_dict in in_dicts.items(): + val = tstamp_dict.copy() + for key in keys: + val = val[key] + vals.append(val) + if "dsp" in tstamp_dict: + tmp_dict = tstamp_dict["dsp"] + else: + tmp_dict = {} + tstamp_dict["dsp"] = tmp_dict + for i, key in enumerate(keys): + if i == len(keys) - 1: + tmp_dict[key] = val + else: + if key in tmp_dict: + tmp_dict = tmp_dict[key] + else: + tmp_dict[key] = {} + tmp_dict = tmp_dict[key] + if isinstance(vals[0], str): + if "*" in vals[0]: + unit = vals[0].split("*")[1] + rounding = ( + len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16 + ) + vals = np.array([float(val.split("*")[0]) for val in vals]) + else: + unit = None + rounding = 16 + else: + vals = np.array(vals) + unit = None + rounding = 16 + + mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals) + mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val + + for _tstamp, tstamp_dict in in_dicts.items(): + val = tstamp_dict + for i, key in enumerate(keys): + if i == len(keys) - 1: + val[key] = mean + else: + val = val[key] + + fig = plt.figure() + plt.scatter( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals + ) + plt.axhline(y=mean_val, color="r", linestyle="-") + plt.xlabel("time") + if unit is not None: + plt.ylabel(f"value {unit}") + else: + plt.ylabel("value") + plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) + plt.gcf().autofmt_xdate() + plt.title(field) + plot_dict[field] = fig + plt.close() + + for file in args.output: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + Props.write_to(file, in_dicts[tstamp]) + + if args.out_plots: + for file in args.out_plots: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + if args.in_plots: + for infile in args.in_plots: + if tstamp in infile: + with Path(infile).open("rb") as f: + old_plot_dict = pkl.load(f) + break + old_plot_dict.update({"psp": plot_dict}) + new_plot_dict = old_plot_dict + else: + new_plot_dict = {"psp": plot_dict} + with Path(file).open("wb") as f: + pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) + + if args.out_obj: + for file in args.out_obj: + tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp + if args.in_obj: + for infile in args.in_obj: + if tstamp in infile: + with Path(infile).open("rb") as f: + old_obj_dict = pkl.load(f) + break + new_obj_dict = old_obj_dict + else: + new_obj_dict = {} + with Path(file).open("wb") as f: + pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py new file mode 100644 index 0000000..8af27a2 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py @@ -0,0 +1,119 @@ +""" +This script applies a simple calibration to the daqenergy for all channels, +it does this using a peak search, matching the peaks to the given ones +and deriving a simple scaling relation from adc to keV. +""" + +import argparse +import logging +import pickle as pkl +from pathlib import Path + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +from dbetto.catalog import Props +from lgdo import lh5 +from pygama.pargen.energy_cal import HPGeCalibration + +mpl.use("agg") + + +def par_geds_raw_blindcal() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--files", help="files", nargs="*", type=str) + + argparser.add_argument("--blind_curve", help="blind_curve", type=str) + argparser.add_argument("--plot_file", help="out plot path", type=str) + + argparser.add_argument("--meta", help="meta", type=str) + argparser.add_argument("--configs", help="configs", type=str) + argparser.add_argument("--log", help="log", type=str) + + argparser.add_argument("--timestamp", help="timestamp", type=str) + argparser.add_argument("--datatype", help="datatype", type=str) + argparser.add_argument("--channel", help="channel", type=str) + + argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") + args = argparser.parse_args() + + logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w") + logging.getLogger("numba").setLevel(logging.INFO) + logging.getLogger("parse").setLevel(logging.INFO) + logging.getLogger("lgdo").setLevel(logging.INFO) + logging.getLogger("matplotlib").setLevel(logging.INFO) + log = logging.getLogger(__name__) + + # load in channel map + # meta = LegendMetadata(args.meta, lazy=True) + + # chmap = meta.channelmap(args.timestamp) + # if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True: + + # peaks to search for + peaks_keV = np.array( + [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50] + ) + + E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as( + "np" + ) + E_uncal = E_uncal[E_uncal > 200] + guess_keV = 2620 / np.nanpercentile(E_uncal, 99) # usual simple guess + # Euc_min = peaks_keV[0] / guess_keV * 0.6 + # Euc_max = peaks_keV[-1] / guess_keV * 1.1 + # dEuc = 1 / guess_keV + + # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) + + hpge_cal = HPGeCalibration( + "daqenergy", + peaks_keV, + guess_keV, + 0, + uncal_is_int=True, + debug_mode=args.debug, + ) + + # Run the rough peak search + detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks( + E_uncal + ) + + log.info(f"{len(detected_peaks_locs)} peaks found:") + log.info("\t Energy | Position ") + for i, (Li, Ei) in enumerate(zip(detected_peaks_locs, detected_peaks_keV)): + log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5)) # noqa: G003 + + # dictionary to pass to build hit + out_dict = { + "pars": { + "operations": { + "daqenergy_cal": { + "expression": "daqenergy*a", + "parameters": {"a": round(roughpars[0], 5)}, + } + } + } + } + + # plot to check thagt the calibration is correct with zoom on 2.6 peak + fig = plt.figure(figsize=(8, 10)) + ax = plt.subplot(211) + ax.hist(E_uncal * roughpars[0], bins=np.arange(0, 3000, 1), histtype="step") + ax.set_ylabel("counts") + ax.set_yscale("log") + ax2 = plt.subplot(212) + ax2.hist( + E_uncal * roughpars[0], + bins=np.arange(2600, 2630, 1 * roughpars[0]), + histtype="step", + ) + ax2.set_xlabel("energy (keV)") + ax2.set_ylabel("counts") + plt.suptitle(args.channel) + with Path(args.plot_file).open("wb") as w: + pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) + plt.close() + + Props.write_to_file(args.blind_curve, out_dict) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py b/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py new file mode 100644 index 0000000..4a8f53c --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py @@ -0,0 +1,114 @@ +""" +This script checks that the blinding for a particular channel is still valid, +it does this by taking the calibration curve stored in the overrides, applying it +to the daqenergy, running a peak search over the calibrated energy and checking that +there are peaks within 5keV of the 583 and 2614 peaks. If the detector is in ac mode +then it will skip the check. +""" + +import argparse +import pickle as pkl +from pathlib import Path + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numexpr as ne +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from lgdo import lh5 +from pygama.math.histogram import get_hist +from pygama.pargen.energy_cal import get_i_local_maxima + +from ....log import build_log + +mpl.use("Agg") + + +def par_geds_raw_blindcheck() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--files", help="files", nargs="*", type=str) + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument("--plot_file", help="plot file", type=str) + argparser.add_argument( + "--blind_curve", help="blinding curves file", nargs="*", type=str + ) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--channel", help="channel", type=str) + argparser.add_argument("--metadata", help="channel", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"] + + log = build_log(config_dict, args.log) + + # get the usability status for this channel + chmap = ( + LegendMetadata(args.metadata, lazy=True) + .channelmap(args.timestamp) + .map("daq.rawid") + ) + det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"] + + # read in calibration curve for this channel + blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"] + + # load in the data + daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[ + 0 + ].view_as("np") + + # calibrate daq energy using pre existing curve + daqenergy_cal = ne.evaluate( + blind_curve["daqenergy_cal"]["expression"], + local_dict=dict( + daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] + ), + ) + + # bin with 1 keV bins and get maxs + hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1)) + maxs = get_i_local_maxima(hist, delta=25) + log.info(f"peaks found at : {maxs}") + + # plot the energy spectrum to check calibration + fig = plt.figure(figsize=(8, 10)) + ax = plt.subplot(211) + ax.hist(daqenergy_cal, bins=np.arange(0, 3000, 1), histtype="step") + ax.set_ylabel("counts") + ax.set_yscale("log") + ax2 = plt.subplot(212) + ax2.hist( + daqenergy_cal, + bins=np.arange(2600, 2630, 1 * blind_curve["daqenergy_cal"]["parameters"]["a"]), + histtype="step", + ) + ax2.set_xlabel("energy (keV)") + ax2.set_ylabel("counts") + plt.suptitle(args.channel) + with Path(args.plot_file).open("wb") as w: + pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL) + plt.close() + + # check for peaks within +- 5keV of 2614 and 583 to ensure blinding still + # valid and if so create file else raise error. if detector is in ac mode it + # will always pass this check + if ( + np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) + ) or det_status is False: + Path(args.output).mkdir(parents=True, exist_ok=True) + Props.write_to( + args.output, + { + "threshold_adc": np.nanmin(daqenergy), + "threshold_kev": np.nanmin(daqenergy_cal), + }, + ) + else: + msg = "peaks not found in daqenergy" + raise RuntimeError(msg) diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py new file mode 100644 index 0000000..ab5f400 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py @@ -0,0 +1,58 @@ +import argparse +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from legendmeta import LegendMetadata +from pygama.pargen.data_cleaning import get_tcm_pulser_ids + +from ....log import build_log + + +def par_geds_tcm_pulser() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--configs", help="configs path", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--channel", help="Channel", type=str, required=True) + + argparser.add_argument( + "--pulser_file", help="pulser file", type=str, required=False + ) + + argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] + + build_log(config_dict, args.log) + + kwarg_dict = config_dict["inputs"]["pulser_config"] + kwarg_dict = Props.read_from(kwarg_dict) + + meta = LegendMetadata(path=args.metadata) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid}" + + if ( + isinstance(args.tcm_files, list) + and args.tcm_files[0].split(".")[-1] == "filelist" + ): + tcm_files = args.tcm_files[0] + with Path(tcm_files).open() as f: + tcm_files = f.read().splitlines() + else: + tcm_files = args.tcm_files + # get pulser mask from tcm files + tcm_files = sorted(np.unique(tcm_files)) + ids, mask = get_tcm_pulser_ids( + tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") + ) + + Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/workflow/src/legenddataflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py deleted file mode 100644 index e65903c..0000000 --- a/workflow/src/legenddataflow/scripts/par_psp_geds.py +++ /dev/null @@ -1,157 +0,0 @@ -import argparse -import pickle as pkl -from datetime import datetime -from pathlib import Path - -import matplotlib as mpl -import matplotlib.dates as mdates -import matplotlib.pyplot as plt -import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata - -from ..FileKey import ChannelProcKey - -mpl.use("Agg") - - -argparser = argparse.ArgumentParser() -argparser.add_argument( - "--input", help="input files", nargs="*", type=str, required=True -) -argparser.add_argument( - "--output", help="output file", nargs="*", type=str, required=True -) -argparser.add_argument( - "--in_plots", help="input plot files", nargs="*", type=str, required=False -) -argparser.add_argument( - "--out_plots", help="output plot files", nargs="*", type=str, required=False -) -argparser.add_argument( - "--in_obj", help="input object files", nargs="*", type=str, required=False -) -argparser.add_argument( - "--out_obj", help="output object files", nargs="*", type=str, required=False -) - -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--configs", help="configs", type=str, required=True) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) -args = argparser.parse_args() - -configs = LegendMetadata(args.configs, lazy=True).on( - args.timestamp, system=args.datatype -) -merge_config = Props.read_from( - configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] -) - -ave_fields = merge_config["average_fields"] - -# partitions could be different for different channels - do separately for each channel -in_dicts = {} -for file in args.input: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - in_dicts[tstamp] = Props.read_from(file) - -plot_dict = {} -for field in ave_fields: - keys = field.split(".") - vals = [] - for _tstamp, tstamp_dict in in_dicts.items(): - val = tstamp_dict.copy() - for key in keys: - val = val[key] - vals.append(val) - if "dsp" in tstamp_dict: - tmp_dict = tstamp_dict["dsp"] - else: - tmp_dict = {} - tstamp_dict["dsp"] = tmp_dict - for i, key in enumerate(keys): - if i == len(keys) - 1: - tmp_dict[key] = val - else: - if key in tmp_dict: - tmp_dict = tmp_dict[key] - else: - tmp_dict[key] = {} - tmp_dict = tmp_dict[key] - if isinstance(vals[0], str): - if "*" in vals[0]: - unit = vals[0].split("*")[1] - rounding = len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16 - vals = np.array([float(val.split("*")[0]) for val in vals]) - else: - unit = None - rounding = 16 - else: - vals = np.array(vals) - unit = None - rounding = 16 - - mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals) - mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val - - for _tstamp, tstamp_dict in in_dicts.items(): - val = tstamp_dict - for i, key in enumerate(keys): - if i == len(keys) - 1: - val[key] = mean - else: - val = val[key] - - fig = plt.figure() - plt.scatter( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals - ) - plt.axhline(y=mean_val, color="r", linestyle="-") - plt.xlabel("time") - if unit is not None: - plt.ylabel(f"value {unit}") - else: - plt.ylabel("value") - plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) - plt.gcf().autofmt_xdate() - plt.title(field) - plot_dict[field] = fig - plt.close() - -for file in args.output: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - Props.write_to(file, in_dicts[tstamp]) - -if args.out_plots: - for file in args.out_plots: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - if args.in_plots: - for infile in args.in_plots: - if tstamp in infile: - with Path(infile).open("rb") as f: - old_plot_dict = pkl.load(f) - break - old_plot_dict.update({"psp": plot_dict}) - new_plot_dict = old_plot_dict - else: - new_plot_dict = {"psp": plot_dict} - with Path(file).open("wb") as f: - pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) - -if args.out_obj: - for file in args.out_obj: - tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp - if args.in_obj: - for infile in args.in_obj: - if tstamp in infile: - with Path(infile).open("rb") as f: - old_obj_dict = pkl.load(f) - break - new_obj_dict = old_obj_dict - else: - new_obj_dict = {} - with Path(file).open("wb") as f: - pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py deleted file mode 100644 index a5310e9..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py +++ /dev/null @@ -1,57 +0,0 @@ -import argparse -import pickle as pkl -from pathlib import Path - -from dbetto import TextDB -from dbetto.catalog import Props -from lgdo import lh5 -from sklearn.svm import SVC - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--configs", help="config file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) - -argparser.add_argument("--output_file", help="output SVM file", type=str, required=True) -argparser.add_argument("--train_data", help="input data file", type=str, required=True) -argparser.add_argument( - "--train_hyperpars", help="input hyperparameter file", required=True -) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"] - -log = build_log(config_dict, args.log) - -# Load files -tb = lh5.read("ml_train/dsp", args.train_data) -log.debug("loaded data") - -hyperpars = Props.read_from(args.train_hyperpars) - -# Define training inputs -dwts_norm = tb["dwt_norm"].nda -labels = tb["dc_label"].nda - -log.debug("training model") -# Initialize and train SVM -svm = SVC( - random_state=int(hyperpars["random_state"]), - kernel=hyperpars["kernel"], - decision_function_shape=hyperpars["decision_function_shape"], - class_weight=hyperpars["class_weight"], - C=float(hyperpars["C"]), - gamma=float(hyperpars["gamma"]), -) - -svm.fit(dwts_norm, labels) -log.debug("trained model") - -# Save trained model with pickle -with Path(args.output_file).open("wb") as svm_file: - pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py deleted file mode 100644 index a47b653..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py +++ /dev/null @@ -1,148 +0,0 @@ -import argparse -import logging -import pickle as pkl -import time -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from lgdo import Array, Table -from pygama.pargen.dplms_ge_dict import dplms_ge_dict - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) -argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) - -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) -argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True) -argparser.add_argument("--plot_path", help="plot_path", type=str) - -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] - -log = build_log(config_dict, args.log) - -log = logging.getLogger(__name__) -sto = lh5.LH5Store() - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -configs = LegendMetadata(args.configs, lazy=True).on( - args.timestamp, system=args.datatype -) -dsp_config = config_dict["inputs"]["proc_chain"][args.channel] - -dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] -dplms_dict = Props.read_from(dplms_json) - -db_dict = Props.read_from(args.database) - -if dplms_dict["run_dplms"] is True: - with Path(args.fft_raw_filelist).open() as f: - fft_files = sorted(f.read().splitlines()) - - t0 = time.time() - log.info("\nLoad fft data") - energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0] - idxs = np.where(energies.nda == 0)[0] - raw_fft = sto.read( - f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs - )[0] - t1 = time.time() - log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}") - - log.info("\nRunning event selection") - peaks_kev = np.array(dplms_dict["peaks_kev"]) - kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]] - - peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ - "peak" - ].nda - ids = np.isin(peaks, peaks_rounded) - peaks = peaks[ids] - idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - - raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] - log.info( - f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}" - ) - - if isinstance(dsp_config, (str, list)): - dsp_config = Props.read_from(dsp_config) - - if args.plot_path: - out_dict, plot_dict = dplms_ge_dict( - raw_fft, - raw_cal, - dsp_config, - db_dict, - dplms_dict, - display=1, - ) - if args.inplots: - with Path(args.inplots).open("rb") as r: - inplot_dict = pkl.load(r) - inplot_dict.update({"dplms": plot_dict}) - - else: - out_dict = dplms_ge_dict( - raw_fft, - raw_cal, - dsp_config, - db_dict, - dplms_dict, - ) - - coeffs = out_dict["dplms"].pop("coefficients") - dplms_pars = Table(col_dict={"coefficients": Array(coeffs)}) - out_dict["dplms"]["coefficients"] = ( - f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')" - ) - - log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes") -else: - out_dict = {} - dplms_pars = Table(col_dict={"coefficients": Array([])}) - if args.inplots: - with Path(args.inplots).open("rb") as r: - inplot_dict = pkl.load(r) - else: - inplot_dict = {} - -db_dict.update(out_dict) - -Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True) -sto.write( - Table(col_dict={"dplms": dplms_pars}), - name=channel, - lh5_file=args.lh5_path, - wo_mode="overwrite", -) - -Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.dsp_pars, db_dict) - -if args.plot_path: - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_path).open("wb") as f: - pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py deleted file mode 100644 index c059961..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py +++ /dev/null @@ -1,395 +0,0 @@ -import argparse -import pickle as pkl -import time -import warnings -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -import pygama.pargen.energy_optimisation as om # noqa: F401 -import sklearn.gaussian_process.kernels as ker -from dbetto import TextDB -from dbetto.catalog import Props -from dspeed.units import unit_registry as ureg -from legendmeta import LegendMetadata -from pygama.math.distributions import hpge_peak -from pygama.pargen.dsp_optimize import ( - BayesianOptimizer, - run_bayesian_optimisation, - run_one_dsp, -) - -from ..log import build_log - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) -warnings.filterwarnings(action="ignore", category=np.RankWarning) - - -argparser = argparse.ArgumentParser() - -argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) -argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str) - -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument( - "--final_dsp_pars", help="final_dsp_pars", type=str, required=True -) -argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) -argparser.add_argument("--plot_path", help="plot_path", type=str) - -argparser.add_argument( - "--plot_save_path", help="plot_save_path", type=str, required=False -) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_eopt"] - -log = build_log(config_dict, args.log) - -sto = lh5.LH5Store() -t0 = time.time() - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -dsp_config = config_dict["inputs"]["processing_chain"][args.channel] -opt_json = config_dict["inputs"]["optimiser_config"][args.channel] - -opt_dict = Props.read_from(opt_json) -db_dict = Props.read_from(args.decay_const) - -if opt_dict.pop("run_eopt") is True: - peaks_kev = np.array(opt_dict["peaks"]) - kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]] - - kwarg_dicts_cusp = [] - kwarg_dicts_trap = [] - kwarg_dicts_zac = [] - for peak in peaks_kev: - peak_idx = np.where(peaks_kev == peak)[0][0] - kev_width = kev_widths[peak_idx] - - kwarg_dicts_cusp.append( - { - "parameter": "cuspEmax", - "func": hpge_peak, - "peak": peak, - "kev_width": kev_width, - "bin_width": 5, - } - ) - kwarg_dicts_zac.append( - { - "parameter": "zacEmax", - "func": hpge_peak, - "peak": peak, - "kev_width": kev_width, - "bin_width": 5, - } - ) - kwarg_dicts_trap.append( - { - "parameter": "trapEmax", - "func": hpge_peak, - "peak": peak, - "kev_width": kev_width, - "bin_width": 5, - } - ) - - peaks_rounded = [int(peak) for peak in peaks_kev] - peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][ - "peak" - ].nda - ids = np.isin(peaks, peaks_rounded) - peaks = peaks[ids] - idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded] - - tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0] - - t1 = time.time() - log.info(f"Data Loaded in {(t1-t0)/60} minutes") - - if isinstance(dsp_config, (str, list)): - dsp_config = Props.read_from(dsp_config) - - dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"] - - init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) - full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]] - flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10 - - if flat_val < 1.0: - flat_val = 1.0 - elif flat_val > 4: - flat_val = 4 - flat_val = f"{flat_val}*us" - - db_dict["cusp"] = {"flat": flat_val} - db_dict["zac"] = {"flat": flat_val} - db_dict["etrap"] = {"flat": flat_val} - - tb_data.add_column("dt_eff", init_data["dt_eff"]) - - dsp_config["processors"].pop("dt_eff") - - dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"] - - kwarg_dict = [ - { - "peak_dicts": kwarg_dicts_cusp, - "ctc_param": "dt_eff", - "idx_list": idx_list, - "peaks_kev": peaks_kev, - }, - { - "peak_dicts": kwarg_dicts_zac, - "ctc_param": "dt_eff", - "idx_list": idx_list, - "peaks_kev": peaks_kev, - }, - { - "peak_dicts": kwarg_dicts_trap, - "ctc_param": "dt_eff", - "idx_list": idx_list, - "peaks_kev": peaks_kev, - }, - ] - - fom = eval(opt_dict["fom"]) - out_field = opt_dict["fom_field"] - out_err_field = opt_dict["fom_err_field"] - sample_x = np.array(opt_dict["initial_samples"]) - - results_cusp = [] - results_zac = [] - results_trap = [] - - sample_y_cusp = [] - sample_y_zac = [] - sample_y_trap = [] - - err_y_cusp = [] - err_y_zac = [] - err_y_trap = [] - - for i, x in enumerate(sample_x): - db_dict["cusp"]["sigma"] = f"{x[0]}*us" - db_dict["zac"]["sigma"] = f"{x[0]}*us" - db_dict["etrap"]["rise"] = f"{x[0]}*us" - - log.info(f"Initialising values {i+1} : {db_dict}") - - tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0) - - res = fom(tb_out, kwarg_dict[0]) - results_cusp.append(res) - sample_y_cusp.append(res[out_field]) - err_y_cusp.append(res[out_err_field]) - - res = fom(tb_out, kwarg_dict[1]) - results_zac.append(res) - sample_y_zac.append(res[out_field]) - err_y_zac.append(res[out_err_field]) - - res = fom(tb_out, kwarg_dict[2]) - results_trap.append(res) - sample_y_trap.append(res[out_field]) - err_y_trap.append(res[out_err_field]) - - log.info(f"{i+1} Finished") - - if np.isnan(sample_y_cusp).all(): - max_cusp = opt_dict["nan_default"] - else: - max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2) - if np.isnan(sample_y_zac).all(): - max_zac = opt_dict["nan_default"] - else: - max_zac = np.ceil(np.nanmax(sample_y_zac) * 2) - if np.isnan(sample_y_trap).all(): - max_trap = opt_dict["nan_default"] - else: - max_trap = np.ceil(np.nanmax(sample_y_trap) * 2) - - nan_vals = [max_cusp, max_zac, max_trap] - - for i in range(len(sample_x)): - if np.isnan(sample_y_cusp[i]): - results_cusp[i]["y_val"] = max_cusp - sample_y_cusp[i] = max_cusp - - if np.isnan(sample_y_zac[i]): - results_zac[i]["y_val"] = max_zac - sample_y_zac[i] = max_zac - - if np.isnan(sample_y_trap[i]): - results_trap[i]["y_val"] = max_trap - sample_y_trap[i] = max_trap - - kernel = ( - ker.ConstantKernel(2.0, constant_value_bounds="fixed") - + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5]) - + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1)) - ) - - lambda_param = 5 - sampling_rate = tb_data["waveform_presummed"]["dt"][0] - sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"]) - waveform_sampling = sampling_rate * sampling_unit - - bopt_cusp = BayesianOptimizer( - acq_func=opt_dict["acq_func"], - batch_size=opt_dict["batch_size"], - kernel=kernel, - sampling_rate=waveform_sampling, - fom_value=out_field, - fom_error=out_err_field, - ) - bopt_cusp.lambda_param = lambda_param - bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us") - - bopt_zac = BayesianOptimizer( - acq_func=opt_dict["acq_func"], - batch_size=opt_dict["batch_size"], - kernel=kernel, - sampling_rate=waveform_sampling, - fom_value=out_field, - fom_error=out_err_field, - ) - bopt_zac.lambda_param = lambda_param - bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us") - - bopt_trap = BayesianOptimizer( - acq_func=opt_dict["acq_func"], - batch_size=opt_dict["batch_size"], - kernel=kernel, - sampling_rate=waveform_sampling, - fom_value=out_field, - fom_error=out_err_field, - ) - bopt_trap.lambda_param = lambda_param - bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us") - - bopt_cusp.add_initial_values( - x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp - ) - bopt_zac.add_initial_values( - x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac - ) - bopt_trap.add_initial_values( - x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap - ) - - best_idx = np.nanargmin(sample_y_cusp) - bopt_cusp.optimal_results = results_cusp[best_idx] - bopt_cusp.optimal_x = sample_x[best_idx] - - best_idx = np.nanargmin(sample_y_zac) - bopt_zac.optimal_results = results_zac[best_idx] - bopt_zac.optimal_x = sample_x[best_idx] - - best_idx = np.nanargmin(sample_y_trap) - bopt_trap.optimal_results = results_trap[best_idx] - bopt_trap.optimal_x = sample_x[best_idx] - - optimisers = [bopt_cusp, bopt_zac, bopt_trap] - - out_param_dict, out_results_list = run_bayesian_optimisation( - tb_data, - dsp_config, - [fom], - optimisers, - fom_kwargs=kwarg_dict, - db_dict=db_dict, - nan_val=nan_vals, - n_iter=opt_dict["n_iter"], - ) - - Props.add_to(db_dict, out_param_dict) - - # db_dict.update(out_param_dict) - - t2 = time.time() - log.info(f"Optimiser finished in {(t2-t1)/60} minutes") - - out_alpha_dict = {} - out_alpha_dict["cuspEmax_ctc"] = { - "expression": "cuspEmax*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["cuspEftp_ctc"] = { - "expression": "cuspEftp*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["zacEmax_ctc"] = { - "expression": "zacEmax*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["zacEftp_ctc"] = { - "expression": "zacEftp*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["trapEmax_ctc"] = { - "expression": "trapEmax*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, - } - - out_alpha_dict["trapEftp_ctc"] = { - "expression": "trapEftp*(1+dt_eff*a)", - "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))}, - } - if "ctc_params" in db_dict: - db_dict["ctc_params"].update(out_alpha_dict) - else: - db_dict.update({"ctc_params": out_alpha_dict}) - - Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True) - with Path(args.qbb_grid_path).open("wb") as f: - pkl.dump(optimisers, f) - -else: - Path(args.qbb_grid_path).touch() - -Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.final_dsp_pars, db_dict) - -if args.plot_path: - if args.inplots: - with Path(args.inplots).open("rb") as r: - plot_dict = pkl.load(r) - else: - plot_dict = {} - - plot_dict["trap_optimisation"] = { - "kernel_space": bopt_trap.plot(init_samples=sample_x), - "acq_space": bopt_trap.plot_acq(init_samples=sample_x), - } - - plot_dict["cusp_optimisation"] = { - "kernel_space": bopt_cusp.plot(init_samples=sample_x), - "acq_space": bopt_cusp.plot_acq(init_samples=sample_x), - } - - plot_dict["zac_optimisation"] = { - "kernel_space": bopt_zac.plot(init_samples=sample_x), - "acq_space": bopt_zac.plot_acq(init_samples=sample_x), - } - - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_path).open("wb") as w: - pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py deleted file mode 100644 index 7e843e8..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py +++ /dev/null @@ -1,108 +0,0 @@ -import argparse -import pickle as pkl -import time -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -import pygama.pargen.noise_optimization as pno -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes -from pygama.pargen.dsp_optimize import run_one_dsp - -from ..log import build_log - -sto = lh5.LH5Store() - -argparser = argparse.ArgumentParser() -argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) -argparser.add_argument("--database", help="database", type=str, required=True) -argparser.add_argument("--inplots", help="inplots", type=str) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) -argparser.add_argument("--plot_path", help="plot_path", type=str) - -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] - -log = build_log(config_dict, args.log) - - -t0 = time.time() - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -dsp_config = config_dict["inputs"]["processing_chain"][args.channel] -opt_json = config_dict["inputs"]["optimiser_config"][args.channel] - -opt_dict = Props.read_from(opt_json) -db_dict = Props.read_from(args.database) - -if opt_dict.pop("run_nopt") is True: - with Path(args.raw_filelist).open() as f: - files = f.read().splitlines() - - raw_files = sorted(files) - - energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0] - idxs = np.where(energies.nda == 0)[0] - tb_data = sto.read( - f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs - )[0] - t1 = time.time() - log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}") - - log.info(f"Select baselines {len(tb_data)}") - dsp_data = run_one_dsp(tb_data, dsp_config) - cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars")) - cut_idxs = get_cut_indexes(dsp_data, cut_dict) - tb_data = sto.read( - f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs] - )[0] - log.info(f"... {len(tb_data)} baselines after cuts") - - if isinstance(dsp_config, (str, list)): - dsp_config = Props.read_from(dsp_config) - - if args.plot_path: - out_dict, plot_dict = pno.noise_optimization( - tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1 - ) - else: - out_dict = pno.noise_optimization( - raw_files, dsp_config, db_dict.copy(), opt_dict, channel - ) - - t2 = time.time() - log.info(f"Optimiser finished in {(t2-t0)/60} minutes") -else: - out_dict = {} - plot_dict = {} - -if args.plot_path: - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - if args.inplots: - with Path(args.inplots).open("rb") as r: - old_plot_dict = pkl.load(r) - plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict) - else: - plot_dict = {"noise_optimisation": plot_dict} - with Path(args.plot_path).open("wb") as f: - pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL) - -Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict)) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py deleted file mode 100644 index 67d8a64..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py +++ /dev/null @@ -1,20 +0,0 @@ -import argparse -from pathlib import Path - -from dbetto.catalog import Props - -argparser = argparse.ArgumentParser() -argparser.add_argument("--log", help="log file", type=str) -argparser.add_argument("--output_file", help="output par file", type=str, required=True) -argparser.add_argument("--input_file", help="input par file", type=str, required=True) -argparser.add_argument("--svm_file", help="svm file", required=True) -args = argparser.parse_args() - -par_data = Props.read_from(args.input_file) - -file = f"'$_/{Path(args.svm_file).name}'" - -par_data["svm"] = {"model_file": file} - -Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.output_file, par_data) diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py deleted file mode 100644 index 1ca084b..0000000 --- a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py +++ /dev/null @@ -1,139 +0,0 @@ -import argparse -import pickle as pkl -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids -from pygama.pargen.dsp_optimize import run_one_dsp -from pygama.pargen.extract_tau import ExtractTau - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--plot_path", help="plot path", type=str, required=False) -argparser.add_argument("--output_file", help="output file", type=str, required=True) - -argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) - -argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) -argparser.add_argument( - "--tcm_files", help="tcm_files", nargs="*", type=str, required=False -) -args = argparser.parse_args() - -sto = lh5.LH5Store() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_dsp_nopt"] - -log = build_log(config_dict, args.log) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -channel_dict = config_dict["inputs"]["processing_chain"][args.channel] -kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] - -kwarg_dict = Props.read_from(kwarg_dict) - -if kwarg_dict["run_tau"] is True: - dsp_config = Props.read_from(channel_dict) - kwarg_dict.pop("run_tau") - if ( - isinstance(args.raw_files, list) - and args.raw_files[0].split(".")[-1] == "filelist" - ): - input_file = args.raw_files[0] - with Path(input_file).open() as f: - input_file = f.read().splitlines() - else: - input_file = args.raw_files - - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) - - data = sto.read( - f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"] - )[0].view_as("pd") - threshold = kwarg_dict.pop("threshold") - - discharges = data["t_sat_lo"] > 0 - discharge_timestamps = np.where(data["timestamp"][discharges])[0] - is_recovering = np.full(len(data), False, dtype=bool) - for tstamp in discharge_timestamps: - is_recovering = is_recovering | np.where( - ( - ((data["timestamp"] - tstamp) < 0.01) - & ((data["timestamp"] - tstamp) > 0) - ), - True, - False, - ) - cuts = np.where( - (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering) - )[0] - - tb_data = sto.read( - f"{channel}/raw", - input_file, - idx=cuts, - n_rows=kwarg_dict.pop("n_events"), - )[0] - - tb_out = run_one_dsp(tb_data, dsp_config) - log.debug("Processed Data") - cut_parameters = kwarg_dict.get("cut_parameters", None) - if cut_parameters is not None: - idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters) - log.debug("Applied cuts") - log.debug(f"{len(idxs)} events passed cuts") - else: - idxs = np.full(len(tb_out), True, dtype=bool) - - tau = ExtractTau(dsp_config, kwarg_dict["wf_field"]) - slopes = tb_out["tail_slope"].nda - log.debug("Calculating pz constant") - - tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]]) - - if args.plot_path: - Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True) - - plot_dict = tau.plot_waveforms_after_correction( - tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean") - ) - plot_dict.update(tau.plot_slopes(slopes[idxs])) - - with Path(args.plot_path).open("wb") as f: - pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL) -else: - out_dict = {} - -Path(args.output_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.output_file, tau.output_dict) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py deleted file mode 100644 index 7e13ed8..0000000 --- a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py +++ /dev/null @@ -1,290 +0,0 @@ -from __future__ import annotations - -import argparse -import pickle as pkl -import warnings -from pathlib import Path -from typing import Callable - -import numpy as np -import pandas as pd -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak -from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from pygama.pargen.utils import load_data - -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - - -def get_results_dict(aoe_class): - return { - "cal_energy_param": aoe_class.cal_energy_param, - "dt_param": aoe_class.dt_param, - "rt_correction": aoe_class.dt_corr, - "1000-1300keV": aoe_class.timecorr_df.to_dict("index"), - "correction_fit_results": aoe_class.energy_corr_res_dict, - "low_cut": aoe_class.low_cut_val, - "high_cut": aoe_class.high_cut_val, - "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"), - "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"), - } - - -def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None): - if plot_dict is not None: - for key, item in plot_options.items(): - if item["options"] is not None: - plot_dict[key] = item["function"](aoe_class, data, **item["options"]) - else: - plot_dict[key] = item["function"](aoe_class, data) - else: - plot_dict = {} - return plot_dict - - -def aoe_calibration( - data: pd.Dataframe, - cal_dicts: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_func: Callable, - pdf: Callable = aoe_peak, - selection_string: str = "", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict | None = None, - high_cut_val: int = 3, - mean_func: Callable = Pol1, - sigma_func: Callable = SigmaFit, - # dep_acc: float = 0.9, - dt_param: str = "dt_eff", - comptBands_width: int = 20, - plot_options: dict | None = None, - debug_mode: bool = False, -): - data["AoE_Uncorr"] = data[current_param] / data[energy_param] - aoe = CalAoE( - cal_dicts=cal_dicts, - cal_energy_param=cal_energy_param, - eres_func=eres_func, - pdf=pdf, - selection_string=selection_string, - dt_corr=dt_corr, - dep_correct=dep_correct, - dt_cut=dt_cut, - dt_param=dt_param, - high_cut_val=high_cut_val, - mean_func=mean_func, - sigma_func=sigma_func, - compt_bands_width=comptBands_width, - debug_mode=debug_mode | args.debug, - ) - - aoe.update_cal_dicts( - { - "AoE_Uncorr": { - "expression": f"{current_param}/{energy_param}", - "parameters": {}, - } - } - ) - - aoe.calibrate(data, "AoE_Uncorr") - log.info("Calibrated A/E") - return ( - cal_dicts, - get_results_dict(aoe), - fill_plot_dict(aoe, data, plot_options), - aoe, - ) - - -argparser = argparse.ArgumentParser() -argparser.add_argument("files", help="files", nargs="*", type=str) -argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - -argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) -argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) - - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) -argparser.add_argument("--hit_pars", help="hit_pars", type=str) -argparser.add_argument("--aoe_results", help="aoe_results", type=str) - -argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_hit_aoecal"] - -log = build_log(config_dict, args.log) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - -channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] -kwarg_dict = Props.read_from(channel_dict) - - -ecal_dict = Props.read_from(args.ecal_file) -cal_dict = ecal_dict["pars"] -eres_dict = ecal_dict["results"]["ecal"] - -with Path(args.eres_file).open("rb") as o: - object_dict = pkl.load(o) - -if kwarg_dict["run_aoe"] is True: - kwarg_dict.pop("run_aoe") - - pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak - - sigma_func = ( - eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit - ) - - mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1 - - if "plot_options" in kwarg_dict: - for field, item in kwarg_dict["plot_options"].items(): - kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - - with Path(args.files[0]).open() as f: - files = f.read().splitlines() - files = sorted(files) - - try: - eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - - def eres_func(x): - return eval(eres["expression"], dict(x=x, **eres["parameters"])) - - except KeyError: - - def eres_func(x): - return x * np.nan - - params = [ - kwarg_dict["current_param"], - "tp_0_est", - "tp_99", - kwarg_dict["energy_param"], - kwarg_dict["cal_energy_param"], - kwarg_dict["cut_field"], - "timestamp", - ] - - if "dt_param" in kwarg_dict: - params += kwarg_dict["dt_param"] - else: - params += "dt_eff" - - if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None: - cal_dict.update(kwarg_dict["dt_cut"]["cut"]) - params.append(kwarg_dict["dt_cut"]["out_param"]) - - # load data in - data, threshold_mask = load_data( - files, - f"{channel}/dsp", - cal_dict, - params=params, - threshold=kwarg_dict.pop("threshold"), - return_selection_mask=True, - ) - - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) - - data["is_pulser"] = mask[threshold_mask] - - cal_dict, out_dict, plot_dict, obj = aoe_calibration( - data, - cal_dicts=cal_dict, - eres_func=eres_func, - selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", - pdf=pdf, - mean_func=mean_func, - sigma_func=sigma_func, - **kwarg_dict, - ) - obj.pdf = obj.pdf.name - - # need to change eres func as can't pickle lambdas - try: - obj.eres_func = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - except KeyError: - obj.eres_func = {} -else: - out_dict = {} - plot_dict = {} - obj = None - -if args.plot_file: - common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None - if args.inplots: - with Path(args.inplots).open("rb") as r: - out_plot_dict = pkl.load(r) - out_plot_dict.update({"aoe": plot_dict}) - else: - out_plot_dict = {"aoe": plot_dict} - - if "common" in list(out_plot_dict) and common_dict is not None: - out_plot_dict["common"].update(common_dict) - elif common_dict is not None: - out_plot_dict["common"] = common_dict - - Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_file).open("wb") as w: - pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - -Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -results_dict = dict(**ecal_dict["results"], aoe=out_dict) -final_hit_dict = { - "pars": {"operations": cal_dict}, - "results": results_dict, -} - -final_hit_dict = convert_dict_np_to_float(final_hit_dict) - -Props.write_to(args.hit_pars, final_hit_dict) - -Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True) -final_object_dict = dict( - **object_dict, - aoe=obj, -) -with Path(args.aoe_results).open("wb") as w: - pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_hit_lq.py b/workflow/src/legenddataflow/scripts/pars_hit_lq.py deleted file mode 100644 index a7a2601..0000000 --- a/workflow/src/legenddataflow/scripts/pars_hit_lq.py +++ /dev/null @@ -1,283 +0,0 @@ -from __future__ import annotations - -import argparse -import pickle as pkl -import warnings -from pathlib import Path - -import numpy as np -import pandas as pd -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.math.distributions import gaussian -from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.data_cleaning import get_tcm_pulser_ids -from pygama.pargen.lq_cal import * # noqa: F403 -from pygama.pargen.lq_cal import LQCal -from pygama.pargen.utils import load_data - -from ..convert_np import convert_dict_np_to_float -from ..log import build_log - -warnings.filterwarnings(action="ignore", category=RuntimeWarning) - - -def get_results_dict(lq_class): - return { - "cal_energy_param": lq_class.cal_energy_param, - "DEP_means": lq_class.timecorr_df.to_dict("index"), - "rt_correction": lq_class.dt_fit_pars, - "cut_fit_pars": lq_class.cut_fit_pars.to_dict(), - "cut_value": lq_class.cut_val, - "sfs": lq_class.low_side_sf.to_dict("index"), - } - - -def fill_plot_dict(lq_class, data, plot_options, plot_dict=None): - if plot_dict is not None: - for key, item in plot_options.items(): - if item["options"] is not None: - plot_dict[key] = item["function"](lq_class, data, **item["options"]) - else: - plot_dict[key] = item["function"](lq_class, data) - else: - plot_dict = {} - return plot_dict - - -def lq_calibration( - data: pd.DataFrame, - cal_dicts: dict, - energy_param: str, - cal_energy_param: str, - dt_param: str, - eres_func: callable, - cdf: callable = gaussian, - selection_string: str = "", - plot_options: dict | None = None, - debug_mode: bool = False, -): - """Loads in data from the provided files and runs the LQ calibration on said files - - Parameters - ---------- - data: pd.DataFrame - A dataframe containing the data used for calibrating LQ - cal_dicts: dict - A dict of hit-level operations to apply to the data - energy_param: string - The energy parameter of choice. Used for normalizing the - raw lq values - cal_energy_param: string - The calibrated energy parameter of choice - dt_param: string - The drift time parameter of choice - eres_func: callable - The energy resolution functions - cdf: callable - The CDF used for the binned fitting of LQ distributions - selection_string: string - A string of flags to apply to the data when running the calibration - plot_options: dict - A dict containing the plot functions the user wants to run,and any - user options to provide those plot functions - - Returns - ------- - cal_dicts: dict - The user provided dict, updated with hit-level operations for LQ - results_dict: dict - A dict containing the results of the LQ calibration - plot_dict: dict - A dict containing all the figures specified by the plot options - lq: LQCal class - The LQCal object used for the LQ calibration - """ - - lq = LQCal( - cal_dicts, - cal_energy_param, - dt_param, - eres_func, - cdf, - selection_string, - debug_mode=debug_mode | args.debug, - ) - - data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param]) - - lq.update_cal_dicts( - { - "LQ_Ecorr": { - "expression": f"lq80/{energy_param}", - "parameters": {}, - } - } - ) - - lq.calibrate(data, "LQ_Ecorr") - log.info("Calibrated LQ") - return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq - - -argparser = argparse.ArgumentParser() -argparser.add_argument("files", help="files", nargs="*", type=str) -argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False) -argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False) - -argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) -argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) -argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) - -argparser.add_argument("--configs", help="configs", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log_file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) -argparser.add_argument("--hit_pars", help="hit_pars", type=str) -argparser.add_argument("--lq_results", help="lq_results", type=str) - -argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_hit_lqcal"] - -log = build_log(config_dict, args.log) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - - -channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] -kwarg_dict = Props.read_from(channel_dict) - -ecal_dict = Props.read_from(args.ecal_file) -cal_dict = ecal_dict["pars"]["operations"] -eres_dict = ecal_dict["results"]["ecal"] - -with Path(args.eres_file).open("rb") as o: - object_dict = pkl.load(o) - -if kwarg_dict["run_lq"] is True: - kwarg_dict.pop("run_lq") - - cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian - - if "plot_options" in kwarg_dict: - for field, item in kwarg_dict["plot_options"].items(): - kwarg_dict["plot_options"][field]["function"] = eval(item["function"]) - - with Path(args.files[0]).open() as f: - files = f.read().splitlines() - files = sorted(files) - - try: - eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - - def eres_func(x): - return eval(eres["expression"], dict(x=x, **eres["parameters"])) - - except KeyError: - - def eres_func(x): - return x * np.nan - - params = [ - "lq80", - "dt_eff", - kwarg_dict["energy_param"], - kwarg_dict["cal_energy_param"], - kwarg_dict["cut_field"], - ] - - # load data in - data, threshold_mask = load_data( - files, - f"{channel}/dsp", - cal_dict, - params=params, - threshold=kwarg_dict.pop("threshold"), - return_selection_mask=True, - ) - - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) - - data["is_pulser"] = mask[threshold_mask] - - cal_dict, out_dict, plot_dict, obj = lq_calibration( - data, - selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)", - cal_dicts=cal_dict, - eres_func=eres_func, - cdf=cdf, - **kwarg_dict, - ) - - # need to change eres func as can't pickle lambdas - try: - obj.eres_func = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy() - except KeyError: - obj.eres_func = {} -else: - out_dict = {} - plot_dict = {} - obj = None - -if args.plot_file: - common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None - if args.inplots: - with Path(args.inplots).open("rb") as r: - out_plot_dict = pkl.load(r) - out_plot_dict.update({"lq": plot_dict}) - else: - out_plot_dict = {"lq": plot_dict} - - if "common" in list(out_plot_dict) and common_dict is not None: - out_plot_dict["common"].update(common_dict) - elif common_dict is not None: - out_plot_dict["common"] = common_dict - - Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True) - with Path(args.plot_file).open("wb") as w: - pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL) - - -final_hit_dict = convert_dict_np_to_float( - { - "pars": {"operations": cal_dict}, - "results": dict(**eres_dict, lq=out_dict), - } -) -Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.hit_pars, final_hit_dict) - -final_object_dict = dict( - **object_dict, - lq=obj, -) -Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True) -with Path(args.lq_results).open("wb") as w: - pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL) diff --git a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py deleted file mode 100644 index ad46f0c..0000000 --- a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py +++ /dev/null @@ -1,57 +0,0 @@ -import argparse -import logging -from pathlib import Path - -import lgdo.lh5 as lh5 -import numpy as np -from dbetto import TextDB -from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import get_tcm_pulser_ids - -from ..log import build_log - -argparser = argparse.ArgumentParser() -argparser.add_argument("--configs", help="configs path", type=str, required=True) -argparser.add_argument("--metadata", help="metadata", type=str, required=True) -argparser.add_argument("--log", help="log file", type=str) - -argparser.add_argument("--datatype", help="Datatype", type=str, required=True) -argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) -argparser.add_argument("--channel", help="Channel", type=str, required=True) - -argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False) - -argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) -args = argparser.parse_args() - -configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) -config_dict = configs["snakemake_rules"]["pars_tcm_pulser"] - -log = build_log(config_dict, args.log) - -sto = lh5.LH5Store() -log = logging.getLogger(__name__) - - -kwarg_dict = config_dict["inputs"]["pulser_config"] -kwarg_dict = Props.read_from(kwarg_dict) - -meta = LegendMetadata(path=args.metadata) -channel_dict = meta.channelmap(args.timestamp, system=args.datatype) -channel = f"ch{channel_dict[args.channel].daq.rawid}" - -if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist": - tcm_files = args.tcm_files[0] - with Path(tcm_files).open() as f: - tcm_files = f.read().splitlines() -else: - tcm_files = args.tcm_files -# get pulser mask from tcm files -tcm_files = sorted(np.unique(tcm_files)) -ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") -) - -Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True) -Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()}) diff --git a/workflow/src/legenddataflow/scripts/tier/dsp.py b/workflow/src/legenddataflow/scripts/tier/dsp.py new file mode 100644 index 0000000..906985b --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/dsp.py @@ -0,0 +1,171 @@ +import argparse +import re +import time +import warnings +from pathlib import Path + +import numpy as np +from dbetto import TextDB +from dbetto.catalog import Props +from dspeed import build_dsp +from legendmeta import LegendMetadata +from lgdo import lh5 + +from ...log import build_log + +warnings.filterwarnings(action="ignore", category=RuntimeWarning) + + +def replace_list_with_array(dic): + for key, value in dic.items(): + if isinstance(value, dict): + dic[key] = replace_list_with_array(value) + elif isinstance(value, list): + dic[key] = np.array(value, dtype="float32") + else: + pass + return dic + + +def build_tier_dsp() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--configs", help="configs path", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--tier", help="Tier", type=str, required=True) + + argparser.add_argument( + "--pars_file", help="database file for detector", nargs="*", default=[] + ) + argparser.add_argument("--input", help="input file", type=str) + + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument("--db_file", help="db file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"] + if args.tier in ["dsp", "psp"]: + config_dict = config_dict["tier_dsp"] + elif args.tier in ["ann", "pan"]: + config_dict = config_dict["tier_ann"] + else: + msg = f"Tier {args.tier} not supported" + raise ValueError(msg) + + log = build_log(config_dict, args.log) + + channel_dict = config_dict["inputs"]["processing_chain"] + settings_dict = config_dict["options"].get("settings", {}) + if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) + + meta = LegendMetadata(path=args.metadata) + chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + if args.tier in ["ann", "pan"]: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file) + for chan, file in channel_dict.items() + } + else: + channel_dict = { + f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file) + for chan, file in channel_dict.items() + } + db_files = [ + par_file + for par_file in args.pars_file + if Path(par_file).suffix in (".json", ".yaml", ".yml") + ] + + database_dic = Props.read_from(db_files, subst_pathvar=True) + database_dic = replace_list_with_array(database_dic) + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0, 99999):05d}" + temp_output = f"{args.output}.{rand_num}" + + start = time.time() + + build_dsp( + args.input, + temp_output, + {}, + database=database_dic, + chan_config=channel_dict, + write_mode="r", + buffer_len=settings_dict.get("buffer_len", 1000), + block_width=settings_dict.get("block_width", 16), + ) + + log.info(f"build_dsp finished in {time.time()-start}") + Path(temp_output).rename(args.output) + + key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "") + + if args.tier in ["dsp", "psp"]: + raw_channels = [ + channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel) + ] + raw_fields = [ + field.split("/")[-1] + for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/") + ] + + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}}, + "dsp": outputs, + }, + "valid_keys": { + key: {"valid_channels": {"raw": raw_channels, "dsp": channels}} + }, + } + else: + outputs = {} + channels = [] + for channel, chan_dict in channel_dict.items(): + output = chan_dict["outputs"] + in_dict = False + for entry in outputs: + if outputs[entry]["fields"] == output: + outputs[entry]["channels"].append(channel.split("/")[0]) + in_dict = True + if in_dict is False: + outputs[f"group{len(list(outputs))+1}"] = { + "channels": [channel.split("/")[0]], + "fields": output, + } + channels.append(channel.split("/")[0]) + + full_dict = { + "valid_fields": { + "ann": outputs, + }, + "valid_keys": {key: {"valid_channels": {"ann": channels}}}, + } + + Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/tier/evt.py b/workflow/src/legenddataflow/scripts/tier/evt.py new file mode 100644 index 0000000..15a76d1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/evt.py @@ -0,0 +1,187 @@ +import argparse +import json +import time +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from dbetto import Props, TextDB +from legendmeta import LegendMetadata +from lgdo.types import Array +from pygama.evt import build_evt + +from ...log import build_log + +sto = lh5.LH5Store() + + +def find_matching_values_with_delay(arr1, arr2, jit_delay): + matching_values = [] + + # Create an array with all possible delay values + delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay + + for delay in delays: + arr2_delayed = arr2 + delay + + # Find matching values and indices + mask = np.isin(arr1, arr2_delayed, assume_unique=True) + matching_values.extend(arr1[mask]) + + return np.unique(matching_values) + + +def build_tier_evt() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--hit_file", help="hit file", type=str) + argparser.add_argument("--dsp_file", help="dsp file", type=str) + argparser.add_argument("--tcm_file", help="tcm file", type=str) + argparser.add_argument("--ann_file", help="ann file") + argparser.add_argument("--xtc_file", help="xtc file", type=str) + argparser.add_argument("--par_files", help="par files", nargs="*") + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--tier", help="Tier", type=str, required=True) + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata path", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--output", help="output file", type=str) + args = argparser.parse_args() + + # load in config + configs = TextDB(args.configs, lazy=True) + if args.tier in ("evt", "pet"): + rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_evt" + ] + + else: + msg = "unknown tier" + raise ValueError(msg) + + config_dict = rule_dict["inputs"] + evt_config_file = config_dict["evt_config"] + + log = build_log(rule_dict, args.log) + + meta = LegendMetadata(args.metadata, lazy=True) + chmap = meta.channelmap(args.timestamp) + + evt_config = Props.read_from(evt_config_file) + + if args.datatype in ("phy", "xtc"): + exp_string = evt_config["operations"]["geds___energy"]["expression"] + exp_string = exp_string.replace( + 'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"' + ) + exp_string = exp_string.replace( + 'cal_par_files=""', f"cal_par_files={args.par_files}" + ) + exp_string2 = exp_string.replace( + 'return_mode="energy"', 'return_mode="tcm_index"' + ) + + file_path_config = { + "operations": { + "geds___energy": {"expression": exp_string}, + "_geds___tcm_idx": {"expression": exp_string2}, + } + } + + log.debug(json.dumps(file_path_config, indent=2)) + + Props.add_to(evt_config, file_path_config) + + # block for snakemake to fill in channel lists + for field, dic in evt_config["channels"].items(): + if isinstance(dic, dict): + chans = chmap.map("system", unique=False)[dic["system"]] + if "selectors" in dic: + try: + for k, val in dic["selectors"].items(): + chans = chans.map(k, unique=False)[val] + except KeyError: + chans = None + if chans is not None: + chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] + else: + chans = [] + evt_config["channels"][field] = chans + + log.debug(json.dumps(evt_config["channels"], indent=2)) + + t_start = time.time() + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + file_table = { + "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), + } + + if args.ann_file is not None: + file_table["ann"] = (args.ann_file, "dsp", "ch{}") + + table = build_evt( + file_table, + evt_config, + ) + + if "muon_config" in config_dict and config_dict["muon_config"] is not None: + muon_config = Props.read_from(config_dict["muon_config"]["evt_config"]) + field_config = Props.read_from(config_dict["muon_config"]["field_config"]) + # block for snakemake to fill in channel lists + for field, dic in muon_config["channels"].items(): + if isinstance(dic, dict): + chans = chmap.map("system", unique=False)[dic["system"]] + if "selectors" in dic: + try: + for k, val in dic["selectors"].items(): + chans = chans.map(k, unique=False)[val] + except KeyError: + chans = None + if chans is not None: + chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))] + else: + chans = [] + muon_config["channels"][field] = chans + + trigger_timestamp = table[field_config["ged_timestamp"]["table"]][ + field_config["ged_timestamp"]["field"] + ].nda + if "hardware_tcm_2" in lh5.ls(args.tcm_file): + muon_table = build_evt( + { + "tcm": (args.tcm_file, "hardware_tcm_2", "ch{}"), + "dsp": (args.dsp_file, "dsp", "ch{}"), + "hit": (args.hit_file, "hit", "ch{}"), + "evt": (None, "evt"), + }, + muon_config, + ) + + muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda + muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda + if len(muon_timestamp[muon_tbl_flag]) > 0: + is_muon_veto_triggered = find_matching_values_with_delay( + trigger_timestamp, + muon_timestamp[muon_tbl_flag], + field_config["jitter"], + ) + muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered) + else: + muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) + else: + muon_flag = np.zeros(len(trigger_timestamp), dtype=bool) + table[field_config["output_field"]["table"]].add_column( + field_config["output_field"]["field"], Array(muon_flag) + ) + + sto.write(obj=table, name="evt", lh5_file=args.output, wo_mode="a") + + t_elap = time.time() - t_start + log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") diff --git a/workflow/src/legenddataflow/scripts/tier/hit.py b/workflow/src/legenddataflow/scripts/tier/hit.py new file mode 100644 index 0000000..9fd489f --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/hit.py @@ -0,0 +1,98 @@ +import argparse +import time +from pathlib import Path + +from dbetto.catalog import Props +from legendmeta import LegendMetadata, TextDB +from lgdo import lh5 +from pygama.hit.build_hit import build_hit + +from ...log import build_log + + +def build_tier_hit() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--pars_file", help="hit pars file", nargs="*") + + argparser.add_argument("--configs", help="configs", type=str, required=True) + argparser.add_argument("--metadata", help="metadata", type=str, required=True) + argparser.add_argument("--log", help="log_file", type=str) + + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--tier", help="Tier", type=str, required=True) + + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument("--db_file", help="db file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True) + if args.tier == "hit" or args.tier == "pht": + config_dict = configs.on(args.timestamp, system=args.datatype)[ + "snakemake_rules" + ]["tier_hit"] + else: + msg = "unknown tier" + raise ValueError(msg) + + log = build_log(config_dict, args.log) + + channel_dict = config_dict["inputs"]["hit_config"] + settings_dict = config_dict["options"].get("settings", {}) + if isinstance(settings_dict, str): + settings_dict = Props.read_from(settings_dict) + + meta = LegendMetadata(path=args.metadata) + chan_map = meta.channelmap(args.timestamp, system=args.datatype) + + pars_dict = Props.read_from(args.pars_file) + pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()} + + hit_dict = {} + channels_present = lh5.ls(args.input) + for channel in pars_dict: + chan_pars = pars_dict[channel].copy() + try: + detector = chan_map.map("daq.rawid")[int(channel[2:])].name + if detector in channel_dict: + cfg_dict = Props.read_from(channel_dict[detector]) + Props.add_to(cfg_dict, chan_pars) + chan_pars = cfg_dict + + if channel in channels_present: + hit_dict[f"{channel}/dsp"] = chan_pars + except KeyError: + pass + + t_start = time.time() + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output) + t_elap = time.time() - t_start + log.info(f"Done! Time elapsed: {t_elap:.2f} sec.") + + hit_outputs = {} + hit_channels = [] + for channel, file in channel_dict.items(): + output = Props.read_from(file)["outputs"] + in_dict = False + for entry in hit_outputs: + if hit_outputs[entry]["fields"] == output: + hit_outputs[entry]["channels"].append(channel) + in_dict = True + if in_dict is False: + hit_outputs[f"group{len(list(hit_outputs))+1}"] = { + "channels": [channel], + "fields": output, + } + hit_channels.append(channel) + + key = args.output.replace(f"-tier_{args.tier}.lh5", "") + + full_dict = { + "valid_fields": {args.tier: hit_outputs}, + "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}}, + } + + Path(args.db_file).parent.mkdir(parents=True, exist_ok=True) + Props.write_to(args.db_file, full_dict) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_blind.py b/workflow/src/legenddataflow/scripts/tier/raw_blind.py new file mode 100644 index 0000000..19eb023 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/raw_blind.py @@ -0,0 +1,185 @@ +""" +This script takes in raw data, applies the calibration to the daqenergy +and uses this to blind the data in a window of Qbb +- 25 keV. It copies over all +channels in a raw file, removing those events that fall within the ROI for Ge detectors +that have a daqenergy calibration curve and are not anti-coincidence only (AC). It removes +the whole event from all of the Ge and SiPM channels. + +In the Snakemake dataflow, this script only runs if the checkfile is found on disk, +but this is controlled by the Snakemake flow (presumably an error is thrown if the file +is not found). This script itself does not check for the existence of such a file. +""" + +import argparse +from pathlib import Path + +import numexpr as ne +import numpy as np +from dbetto.catalog import Props +from legendmeta import LegendMetadata, TextDB +from lgdo import lh5 + +from ...log import build_log + + +def build_tier_raw_blind() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--input", help="input file", type=str) + argparser.add_argument("--output", help="output file", type=str) + argparser.add_argument( + "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*" + ) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--metadata", help="metadata", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_raw" + ] + + build_log(config_dict, args.log) + + hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"] + blinding_settings = Props.read_from(config_dict["config"]) + + centroid = blinding_settings["centroid_in_keV"] # keV + width = blinding_settings["width_in_keV"] # keV + + # list of all channels and objects in the raw file + all_channels = lh5.ls(args.input) + + # list of Ge channels and SiPM channels with associated metadata + legendmetadata = LegendMetadata(args.metadata, lazy=True) + ged_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["geds"] + .map("daq.rawid") + ) + spms_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["spms"] + .map("daq.rawid") + ) + auxs_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["auxs"] + .map("daq.rawid") + ) + blsn_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["bsln"] + .map("daq.rawid") + ) + puls_channels = ( + legendmetadata.channelmap(args.timestamp) + .map("system", unique=False)["puls"] + .map("daq.rawid") + ) + + store = lh5.LH5Store() + + # rows that need blinding + toblind = np.array([]) + + # first, loop through the Ge detector channels, calibrate them and look for events that should be blinded + for chnum in list(ged_channels): + # skip Ge detectors that are anti-coincidence only or not able to be blinded for some other reason + if ged_channels[chnum]["analysis"]["is_blinded"] is False: + continue + + # load in just the daqenergy for now + daqenergy, _ = store.read(f"ch{chnum}/raw/daqenergy", args.input) + + # read in calibration curve for this channel + blind_curve = Props.read_from(args.blind_curve)[f"ch{chnum}"]["pars"][ + "operations" + ] + + # calibrate daq energy using pre existing curve + daqenergy_cal = ne.evaluate( + blind_curve["daqenergy_cal"]["expression"], + local_dict=dict( + daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"] + ), + ) + + # figure out which event indices should be blinded + toblind = np.append( + toblind, + np.nonzero(np.abs(np.asarray(daqenergy_cal) - centroid) <= width)[0], + ) + + # remove duplicates + toblind = np.unique(toblind) + + # total number of events (from last Ge channel loaded, should be same for all Ge channels) + allind = np.arange(len(daqenergy)) + + # gets events that should not be blinded + tokeep = allind[np.logical_not(np.isin(allind, toblind))] + + # make some temp file to write the output to before renaming it + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.output}.{rand_num}" + Path(temp_output).parent.mkdir(parents=True, exist_ok=True) + + for channel in all_channels: + try: + chnum = int(channel[2::]) + except ValueError: + # if this isn't an interesting channel, just copy it to the output file + chobj, _ = store.read(channel, args.input, decompress=False) + store.write_object( + chobj, + channel, + lh5_file=temp_output, + wo_mode="w", + **hdf_settings, + ) + continue + + if ( + (chnum not in list(ged_channels)) + and (chnum not in list(spms_channels)) + and (chnum not in list(auxs_channels)) + and (chnum not in list(blsn_channels)) + and (chnum not in list(puls_channels)) + ): + # if this is a PMT or not included for some reason, just copy it to the output file + chobj, _ = store.read(channel + "/raw", args.input, decompress=False) + store.write_object( + chobj, + group=channel, + name="raw", + lh5_file=temp_output, + wo_mode="w", + **hdf_settings, + ) + continue + + # the rest should be the Ge and SiPM channels that need to be blinded + + # read in all of the data but only for the unblinded events + blinded_chobj, _ = store.read( + channel + "/raw", args.input, idx=tokeep, decompress=False + ) + + # now write the blinded data for this channel + store.write_object( + blinded_chobj, + group=channel, + name="raw", + lh5_file=temp_output, + wo_mode="w", + **hdf_settings, + ) + + # rename the temp file + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py new file mode 100644 index 0000000..fefc8a1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py @@ -0,0 +1,72 @@ +import argparse +from copy import deepcopy +from pathlib import Path + +import numpy as np +from daq2lh5 import build_raw +from dbetto import TextDB +from dbetto.catalog import Props + +from ...log import build_log + + +def build_tier_raw_fcio() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("input", help="input file", type=str) + argparser.add_argument("output", help="output file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + config_dict = ( + TextDB(args.configs, lazy=True) + .on(args.timestamp, system=args.datatype) + .snakemake_rules.tier_raw_fcio + ) + + build_log(config_dict, args.log) + + channel_dict = config_dict.inputs + settings = Props.read_from(channel_dict.settings) + channel_dict = channel_dict.out_spec + all_config = Props.read_from(channel_dict.gen_config) + + chmap = ( + TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system") + ) + + if "geds_config" in channel_dict: + raise NotImplementedError() + + if "spms_config" in channel_dict: + spm_config = Props.read_from(channel_dict.spms_config) + spm_channels = chmap.spms.map("daq.rawid") + + for rawid, chinfo in spm_channels.items(): + cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"]) + cfg_block["key_list"] = [chinfo.daq.fc_channel] + spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block + + spm_config["FCEventDecoder"].pop("__output_table_name__") + + Props.add_to(all_config, spm_config) + + if "auxs_config" in channel_dict: + raise NotImplementedError() + + if "muon_config" in channel_dict: + raise NotImplementedError() + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.output}.{rand_num}" + + build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) + + # rename the temp file + Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py new file mode 100644 index 0000000..00d7751 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py @@ -0,0 +1,110 @@ +import argparse +import logging +from pathlib import Path + +import numpy as np +from daq2lh5 import build_raw +from dbetto import TextDB +from dbetto.catalog import Props + +from ...log import build_log + + +def build_tier_raw_orca() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("input", help="input file", type=str) + argparser.add_argument("output", help="output file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--log", help="log file") + args = argparser.parse_args() + + Path(args.log).parent.mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w") + + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + configs = TextDB(args.configs, lazy=True) + config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][ + "tier_raw" + ] + + build_log(config_dict, args.log) + + channel_dict = config_dict["inputs"] + settings = Props.read_from(channel_dict["settings"]) + channel_dict = channel_dict["out_spec"] + all_config = Props.read_from(channel_dict["gen_config"]) + + chmap = TextDB(args.chan_maps, lazy=True) + + if "geds_config" in list(channel_dict): + ged_config = Props.read_from(channel_dict["geds_config"]) + + ged_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["geds"] + .map("daq.rawid") + ) + + ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels) + Props.add_to(all_config, ged_config) + + if "spms_config" in list(channel_dict): + spm_config = Props.read_from(channel_dict["spms_config"]) + + spm_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["spms"] + .map("daq.rawid") + ) + + spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels) + Props.add_to(all_config, spm_config) + + if "auxs_config" in list(channel_dict): + aux_config = Props.read_from(channel_dict["auxs_config"]) + aux_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["auxs"] + .map("daq.rawid") + ) + aux_channels += list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["puls"] + .map("daq.rawid") + ) + aux_channels += list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["bsln"] + .map("daq.rawid") + ) + top_key = next(iter(aux_config)) + aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted( + aux_channels + ) + Props.add_to(all_config, aux_config) + + if "muon_config" in list(channel_dict): + muon_config = Props.read_from(channel_dict["muon_config"]) + muon_channels = list( + chmap.channelmaps.on(args.timestamp) + .map("system", unique=False)["muon"] + .map("daq.rawid") + ) + top_key = next(iter(muon_config)) + muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted( + muon_channels + ) + Props.add_to(all_config, muon_config) + + rng = np.random.default_rng() + rand_num = f"{rng.integers(0,99999):05d}" + temp_output = f"{args.output}.{rand_num}" + + build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings) + + # rename the temp file + Path(temp_output).rename(args.output) diff --git a/workflow/src/legenddataflow/scripts/tier/skm.py b/workflow/src/legenddataflow/scripts/tier/skm.py new file mode 100644 index 0000000..a698629 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/skm.py @@ -0,0 +1,96 @@ +import argparse + +import awkward as ak +from dbetto import TextDB +from dbetto.catalog import Props +from lgdo import lh5 +from lgdo.types import Array, Struct, Table, VectorOfVectors + +from ...log import build_log + + +def get_all_out_fields(input_table, out_fields, current_field=""): + for key in input_table: + field = input_table[key] + key_string = f"{current_field}.{key}" + if isinstance(field, (Table, Struct)): + get_all_out_fields(field, out_fields, key_string) + else: + if key_string not in out_fields: + out_fields.append(key_string) + return out_fields + + +def build_tier_skm() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("--evt_file", help="evt file", required=True) + argparser.add_argument("--configs", help="configs", required=True) + argparser.add_argument("--datatype", help="datatype", required=True) + argparser.add_argument("--timestamp", help="timestamp", required=True) + argparser.add_argument("--log", help="log file", default=None) + argparser.add_argument("--output", help="output file", required=True) + args = argparser.parse_args() + + # load in config + config_dict = TextDB(args.configs, lazy=True).on( + args.timestamp, system=args.datatype + )["snakemake_rules"]["tier_skm"] + + build_log(config_dict, args.log) + + skm_config_file = config_dict["inputs"]["skm_config"] + evt_filter = Props.read_from(skm_config_file)["evt_filter"] + out_fields = Props.read_from(skm_config_file)["keep_fields"] + + store = lh5.LH5Store() + + evt = lh5.read_as("evt", args.evt_file, "ak") # noqa: F841 + + # remove unwanted events + skm = eval(f"evt[{evt_filter}]") + # make it rectangular and make an LGDO Table + out_table = Table(skm) + + for field in out_fields: + items = field.split(".") + ptr1 = out_table + for item in items[:-1]: + ptr1 = ptr1[item] + + if isinstance(ptr1[items[-1]], Table): + out_fields.remove(field) + out_fields = get_all_out_fields( + ptr1[items[-1]], out_fields, current_field=field + ) + + # remove unwanted columns + out_table_skm = Table(size=len(out_table)) + for field in out_fields: + # table nesting is labeled by '.' in the config + items = field.split(".") + # get to actual nested field recursively + ptr1 = out_table + ptr2 = out_table_skm + for item in items[:-1]: + # make intermediate tables in new table + if item not in ptr2: + ptr2.add_field(item, Table(size=len(out_table))) + # get non-table LGDO recursively + ptr1 = ptr1[item] + ptr2 = ptr2[item] + + # finally add column to new table + if isinstance(ptr1[items[-1]], VectorOfVectors): + ptr2.add_field(items[-1], Array(ak.flatten(ptr1[items[-1]].view_as("ak")))) + else: + ptr2.add_field(items[-1], ptr1[items[-1]]) + attrs = ptr1[items[-1]].attrs + + # forward LGDO attributes + # attrs = evt[field.replace(".", "_")].attrs + for attr, val in attrs.items(): + if attr != "datatype": + ptr2.attrs[attr] = val + + # write-append to disk + store.write(out_table_skm, "skm", args.output, wo_mode="w") diff --git a/workflow/src/legenddataflow/scripts/tier/tcm.py b/workflow/src/legenddataflow/scripts/tier/tcm.py new file mode 100644 index 0000000..6f53b1f --- /dev/null +++ b/workflow/src/legenddataflow/scripts/tier/tcm.py @@ -0,0 +1,55 @@ +import argparse +from pathlib import Path + +import lgdo.lh5 as lh5 +import numpy as np +from daq2lh5.orca import orca_flashcam +from dbetto import TextDB +from dbetto.catalog import Props +from pygama.evt.build_tcm import build_tcm + +from ...log import build_log + + +def build_tier_tcm() -> None: + argparser = argparse.ArgumentParser() + argparser.add_argument("input", help="input file", type=str) + argparser.add_argument("output", help="output file", type=str) + argparser.add_argument("--datatype", help="Datatype", type=str, required=True) + argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) + argparser.add_argument("--configs", help="config file", type=str) + argparser.add_argument("--log", help="log file", type=str) + args = argparser.parse_args() + + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) + config_dict = configs["snakemake_rules"]["tier_tcm"] + + build_log(config_dict, args.log) + + settings = Props.read_from(config_dict["inputs"]["config"]) + + rng = np.random.default_rng() + temp_output = f"{args.output}.{rng.integers(0, 99999):05d}" + Path(args.output).parent.mkdir(parents=True, exist_ok=True) + + # get the list of channels by fcid + ch_list = lh5.ls(args.input, "/ch*") + fcid_channels = {} + for ch in ch_list: + key = int(ch[2:]) + fcid = orca_flashcam.get_fcid(key) + if fcid not in fcid_channels: + fcid_channels[fcid] = [] + fcid_channels[fcid].append(f"/{ch}/raw") + + # make a hardware_tcm_[fcid] for each fcid + for fcid, fcid_dict in fcid_channels.items(): + build_tcm( + [(args.input, fcid_dict)], + out_file=temp_output, + out_name=f"hardware_tcm_{fcid}", + wo_mode="o", + **settings, + ) + + Path(temp_output).rename(args.output) From b2b5ec6b98a85daa3f89c1102471d820a5f057eb Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 00:07:27 +0100 Subject: [PATCH 079/101] debugging --- workflow/src/legenddataflow/FileKey.py | 23 ++++++++++++++-- workflow/src/legenddataflow/cal_grouping.py | 29 +++++++++++++-------- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py index c11e6e5..1e33b96 100644 --- a/workflow/src/legenddataflow/FileKey.py +++ b/workflow/src/legenddataflow/FileKey.py @@ -5,6 +5,7 @@ import re import string from collections import namedtuple +from itertools import product from pathlib import Path from .patterns import ( @@ -21,10 +22,16 @@ def regex_from_filepattern(filepattern): f = [] + wildcards = [] last = 0 for match in re.compile(r"\{(?P[\w]+)\}").finditer(filepattern): + f.append(re.escape(filepattern[last : match.start()])) wildcard = match.group("name") - f.append(f"(?P={wildcard})") + if wildcard in wildcards: + f.append(f"(?P={wildcard})") + else: + wildcards.append(wildcard) + f.append(f"(?P<{wildcard}>.+)") last = match.end() f.append(re.escape(filepattern[last:])) f.append("$") @@ -101,9 +108,21 @@ def parse_keypart(cls, keypart): return cls(**d) def expand(self, file_pattern, **kwargs): + if isinstance(file_pattern, Path): + file_pattern = file_pattern.as_posix() wildcard_dict = dict(**self._asdict(), **kwargs) + wildcard_dict = { + wildcard: [wildcard_value] + if isinstance(wildcard_value, str) + else wildcard_value + for wildcard, wildcard_value in wildcard_dict.items() + } formatter = string.Formatter() - return [formatter.vformat(file_pattern, (), wildcard_dict)] + result = [] + for combo in product(*wildcard_dict.values()): + substitution = dict(zip(list(wildcard_dict), combo)) + result.append(formatter.vformat(file_pattern, (), substitution)) + return result def get_path_from_filekey(self, pattern, **kwargs): if kwargs is None: diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py index 5c19ea7..13145d7 100644 --- a/workflow/src/legenddataflow/cal_grouping.py +++ b/workflow/src/legenddataflow/cal_grouping.py @@ -85,7 +85,7 @@ def get_par_files( ): all_par_files.append(par_file) if channel == "default": - channel = "{channel}" + channel = "{detector}" selected_par_files = [] for par_file in all_par_files: fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) @@ -138,7 +138,7 @@ def get_plt_files( ): all_par_files.append(par_file) if channel == "default": - channel = "{channel}" + channel = "{detector}" selected_par_files = [] for par_file in all_par_files: fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) @@ -184,14 +184,17 @@ def get_log_file( datatype=datatype, name=name, ) - fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) - if channel == "default": - fk.channel = "{channel}" + if len(par_files) > 0: + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) + if channel == "default": + fk.channel = "{detector}" + else: + fk.channel = channel + return fk.get_path_from_filekey( + get_pattern_log_channel(self.setup, name, processing_timestamp) + )[0] else: - fk.channel = channel - return fk.get_path_from_filekey( - get_pattern_log_channel(self.setup, name, processing_timestamp) - )[0] + return "/tmp/log.log" def get_timestamp( self, catalog, dataset, channel, tier, experiment="l200", datatype="cal" @@ -205,8 +208,12 @@ def get_timestamp( datatype=datatype, name=None, ) - fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) - return fk.timestamp + + if len(par_files) > 0: + fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) + return fk.timestamp + else: + return "20200101T000000Z" def get_wildcard_constraints(self, dataset, channel): if channel == "default": From 16128845edc10e22605a3635203c4a338f376986 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 00:08:41 +0100 Subject: [PATCH 080/101] add channel merge rules --- workflow/rules/channel_merge.smk | 156 +++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 workflow/rules/channel_merge.smk diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk new file mode 100644 index 0000000..f717eda --- /dev/null +++ b/workflow/rules/channel_merge.smk @@ -0,0 +1,156 @@ +from scripts.util.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_pars, +) +from scripts.util.utils import set_last_rule_name +import inspect + +def build_merge_rules(tier,lh5_merge=False): + rule: + input: + lambda wildcards: get_plt_chanlist( + setup, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + get_pattern_plts(setup, tier), + group: + f"merge-{tier}" + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + "--channelmap {meta} " + + set_last_rule_name(workflow, f"build_plts_{tier}") + + rule: + input: + lambda wildcards: get_par_chanlist( + setup, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + name="objects", + extension="pkl", + ), + output: + get_pattern_pars( + setup, + tier, + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + group: + f"merge-{tier}" + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + + set_last_rule_name(workflow, f"build_pars_{tier}_objects") + + if lh5_merge is True: + rule: + input: + lambda wildcards: get_par_chanlist( + setup, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + temp( + get_pattern_pars_tmp( + setup, + tier, + datatype="cal", + ) + ), + group: + f"merge-{tier}" + shell: + "{swenv} python3 -B " + "{basedir}/../scripts/merge_channels.py " + "--input {input} " + "--output {output} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " + + set_last_rule_name(workflow, f"build_pars_{tier}_db") + + rule: + input: + in_files=lambda wildcards: get_par_chanlist( + setup, + f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", + tier, + basedir, + det_status, + chan_maps, + extension="lh5" if lh5_merge is True else inspect.signature(get_par_chanlist).parameters['extension'].default, + ), + in_db=get_pattern_pars_tmp( + setup, + "dsp", + datatype="cal", + ) if lh5_merge is True else None, + plts=get_pattern_plts(setup, "dsp"), + objects=get_pattern_pars( + setup, + "dsp", + name="objects", + extension="dir", + check_in_cycle=check_in_cycle, + ), + params: + timestamp="{timestamp}", + datatype="cal", + output: + out_file=get_pattern_pars( + setup, + tier, + extension="lh5" if lh5_merge is True else inspect.signature(get_pattern_pars).parameters['extension'].default, + check_in_cycle=check_in_cycle, + ), + out_db=get_pattern_pars(setup, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else None, + group: + f"merge-{tier}" + run: + shell_cmd = "{swenv} python3 -B " + shell_cmd += "{basedir}/../scripts/merge_channels.py " + shell_cmd += "--output {output.out_file} " + shell_cmd += "--input {input.in_files} " + shell_cmd += "--timestamp {params.timestamp} " + shell_cmd += "--channelmap {meta} " + if lh5_merge is True: + shell_cmd +="--in_db {input.in_db} " + shell_cmd +="--out_db {output.out_db} " + shell( + shell_cmd + ) + + set_last_rule_name(workflow, f"build_pars_{tier}") From 9be9e0983b0e0ff5843d9186c220332bf64294b5 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 17:49:59 +0100 Subject: [PATCH 081/101] update to script execs --- workflow/rules/ann.smk | 7 ++--- workflow/rules/blinding_calibration.smk | 10 +++---- workflow/rules/blinding_check.smk | 10 +++---- workflow/rules/chanlist_gen.smk | 2 +- workflow/rules/channel_merge.smk | 33 ++++++++++---------- workflow/rules/dsp.smk | 16 ++++------ workflow/rules/dsp_pars_geds.smk | 22 +++++--------- workflow/rules/evt.smk | 11 ++++--- workflow/rules/hit.smk | 25 ++++++---------- workflow/rules/pht.smk | 40 +++++++++---------------- workflow/rules/pht_fast.smk | 7 ++--- workflow/rules/psp.smk | 16 ++++------ workflow/rules/psp_pars_geds.smk | 13 ++++---- workflow/rules/qc_phy.smk | 13 ++++---- workflow/rules/raw.smk | 10 +++---- workflow/rules/skm.smk | 4 +-- workflow/rules/tcm.smk | 7 ++--- 17 files changed, 98 insertions(+), 148 deletions(-) diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index 7a50005..8e7429f 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -9,6 +9,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script rule build_ann: @@ -29,8 +30,7 @@ rule build_ann: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -61,8 +61,7 @@ rule build_pan: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index 8407893..1a69313 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -12,6 +12,7 @@ from legenddataflow.patterns import ( get_pattern_log_channel, ) from pathlib import Path +from legenddataflow.execenv import execenv_smk_py_script rule build_blinding_calibration: @@ -37,8 +38,7 @@ rule build_blinding_calibration: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/blinding_calibration.py " + f'{execenv_smk_py_script(config, "par_geds_raw_blindcal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -66,8 +66,7 @@ rule build_plts_blinding: group: "merge-blindcal" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -89,7 +88,6 @@ rule build_pars_blinding: group: "merge-blindcal" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index 916009f..bd9b796 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -12,6 +12,7 @@ from legenddataflow.patterns import ( get_pattern_plts, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script from pathlib import Path @@ -38,8 +39,7 @@ rule build_blinding_check: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/check_blinding.py " + f'{execenv_smk_py_script(config, "par_geds_raw_blindcheck")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -67,8 +67,7 @@ rule build_plts_raw: group: "merge-raw" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -92,5 +91,4 @@ rule build_pars_raw: group: "merge-raw" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index 06200e3..b6a3ea8 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -23,7 +23,7 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps): f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = "create_chankeylist" # execenv_smk_py_script(workflow, config, )[0] + cmd = execenv_smk_py_script(config, "create_chankeylist") cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " cmd += f"--datatype cal --output_file {output_file}" os.system(cmd) diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk index f717eda..21ebf25 100644 --- a/workflow/rules/channel_merge.smk +++ b/workflow/rules/channel_merge.smk @@ -8,6 +8,7 @@ from scripts.util.patterns import ( ) from scripts.util.utils import set_last_rule_name import inspect +from legenddataflow.execenv import execenv_smk_py_script def build_merge_rules(tier,lh5_merge=False): rule: @@ -28,8 +29,7 @@ def build_merge_rules(tier,lh5_merge=False): group: f"merge-{tier}" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--channelmap {meta} " @@ -59,8 +59,7 @@ def build_merge_rules(tier,lh5_merge=False): group: f"merge-{tier}" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -93,8 +92,7 @@ def build_merge_rules(tier,lh5_merge=False): group: f"merge-{tier}" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + execenv_smk_py_script(config, "merge_channels") "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -140,17 +138,18 @@ def build_merge_rules(tier,lh5_merge=False): group: f"merge-{tier}" run: - shell_cmd = "{swenv} python3 -B " - shell_cmd += "{basedir}/../scripts/merge_channels.py " - shell_cmd += "--output {output.out_file} " - shell_cmd += "--input {input.in_files} " - shell_cmd += "--timestamp {params.timestamp} " - shell_cmd += "--channelmap {meta} " - if lh5_merge is True: - shell_cmd +="--in_db {input.in_db} " - shell_cmd +="--out_db {output.out_db} " - shell( - shell_cmd + shell_string = ( + execenv_smk_py_script(config, "merge_channels") + "--output {output.out_file} " + "--input {input.in_files} " + "--timestamp {params.timestamp} " + "--channelmap {meta} " ) + if lh5_merge is True: + shell_string += ( + "--in_db {input.in_db} " + "--out_db {output.out_db} " + ) + shell(shell_string) set_last_rule_name(workflow, f"build_pars_{tier}") diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index 8340dc8..a386c86 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -15,6 +15,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -47,8 +48,7 @@ rule build_plts_dsp: group: "merge-dsp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--channelmap {meta} " @@ -80,8 +80,7 @@ rule build_pars_dsp_objects: group: "merge-dsp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -112,8 +111,7 @@ rule build_pars_dsp_db: group: "merge-dsp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -159,8 +157,7 @@ rule build_pars_dsp: group: "merge-dsp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--output {output.out_file} " "--in_db {input.in_db} " "--out_db {output.out_db} " @@ -192,8 +189,7 @@ rule build_dsp: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--tier dsp " f"--configs {ro(configs)} " diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 9fe1391..52f5cf6 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -13,6 +13,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -41,8 +42,7 @@ rule build_pars_dsp_tau_geds: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_tau_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_tau")}' "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -76,8 +76,7 @@ rule build_pars_evtsel_geds: runtime=300, mem_swap=70, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_evtsel_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_evtsel")}' "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -114,8 +113,7 @@ rule build_pars_dsp_nopt_geds: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_nopt_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_nopt")}' "--database {input.database} " "--configs {configs} " "--log {log} " @@ -154,8 +152,7 @@ rule build_pars_dsp_dplms_geds: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_dplms_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_dplms")}' "--fft_raw_filelist {input.fft_files} " "--peak_file {input.peak_file} " "--database {input.database} " @@ -193,8 +190,7 @@ rule build_pars_dsp_eopt_geds: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_eopt_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_eopt")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -225,8 +221,7 @@ rule build_svm_dsp_geds: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_build_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}' "--log {log} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " @@ -246,8 +241,7 @@ rule build_pars_dsp_svm_geds: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}' "--log {log} " "--input_file {input.dsp_pars} " "--output_file {output.dsp_pars} " diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index bb90ce8..1bcb2a4 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -9,6 +9,7 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_log_concat, ) +from legenddataflow.execenv import execenv_smk_py_script rule build_evt: @@ -43,8 +44,7 @@ rule build_evt: mem_swap=50, run: shell_string = ( - f"{swenv} python3 -B " - f"{basedir}/../scripts/build_evt.py " + f'{execenv_smk_py_script(config, "build_tier_evt")}' f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " @@ -96,8 +96,7 @@ rule build_pet: mem_swap=50, run: shell_string = ( - f"{swenv} python3 -B " - f"{basedir}/../scripts/build_evt.py " + f'{execenv_smk_py_script(config, "build_tier_evt")}' f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " @@ -139,14 +138,14 @@ for evt_tier in ("evt", "pet"): params: timestamp="all", datatype="{datatype}", - lh5concat_exe=config["paths"]["install"] + "/bin/lh5concat", ro_input=lambda _, input: utils.as_ro(config, input), log: get_pattern_log_concat(config, f"tier_{evt_tier}_concat", time), group: "tier-evt" shell: - "{swenv} {params.lh5concat_exe} --verbose --overwrite " + f'{execenv_smk_py_script(config, "lh5concat")}' + "--verbose --overwrite " "--output {output} " "-- {params.ro_input} &> {log}" diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 9ae6f77..aaa1cf5 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -20,6 +20,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -59,8 +60,7 @@ rule build_qc: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_qc.py " + f'{execenv_smk_py_script(config, "par_geds_hit_qc")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -108,8 +108,7 @@ rule build_energy_calibration: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_ecal.py " + f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -157,8 +156,7 @@ rule build_aoe_calibration: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_aoe.py " + f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -204,8 +202,7 @@ rule build_lq_calibration: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_lq.py " + f'{execenv_smk_py_script(config, "par_geds_hit_lq")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -247,8 +244,7 @@ rule build_pars_hit_objects: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {params.ro_input} " "--output {output} " "--channelmap {meta} " @@ -271,8 +267,7 @@ rule build_plts_hit: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {params.ro_input} " "--output {output} " "--channelmap {meta} " @@ -303,8 +298,7 @@ rule build_pars_hit: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {params.ro_input[infiles]} " "--output {output} " "--channelmap {meta} " @@ -331,8 +325,7 @@ rule build_hit: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_hit.py " + f'{execenv_smk_py_script(config, "build_tier_hit")}' f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index dd1deb4..1b792c6 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -20,6 +20,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script pht_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -126,8 +127,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 30, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc.py " + f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -179,8 +179,7 @@ rule build_pht_qc: mem_swap=60, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc.py " + f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -241,8 +240,7 @@ rule build_per_energy_calibration: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_hit_ecal.py " + f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -354,8 +352,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_partcal.py " + f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -415,8 +412,7 @@ rule build_pht_energy_super_calibrations: mem_swap=60, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_partcal.py " + f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -537,8 +533,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_aoecal.py " + f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -598,8 +593,7 @@ rule build_pht_aoe_calibrations: mem_swap=60, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_aoecal.py " + f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -718,8 +712,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_lqcal.py " + f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -774,8 +767,7 @@ rule build_pht_lq_calibration: mem_swap=60, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_lqcal.py " + f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -826,8 +818,7 @@ rule build_pars_pht_objects: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -847,8 +838,7 @@ rule build_plts_pht: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -876,8 +866,7 @@ rule build_pars_pht: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input.infiles} " "--output {output} " @@ -903,8 +892,7 @@ rule build_pht: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_hit.py " + f'{execenv_smk_py_script(config, "build_tier_hit")}' f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk index 75f7a47..2379753 100644 --- a/workflow/rules/pht_fast.smk +++ b/workflow/rules/pht_fast.smk @@ -11,6 +11,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script pht_fast_rules = {} @@ -105,8 +106,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 12, runtime=300, shell: - "{swenv} python3 -B " - f"{basedir}/../scripts/pars_pht_fast.py " + f'{execenv_smk_py_script(config, "par_geds_pht_fast")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -164,8 +164,7 @@ rule par_pht_fast: mem_swap=50, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_fast.py " + f'{execenv_smk_py_script(config, "par_geds_pht_fast")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index 456d30a..990f186 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -14,6 +14,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -51,8 +52,7 @@ rule build_pars_psp_objects: group: "merge-psp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--channelmap {meta} " @@ -73,8 +73,7 @@ rule build_plts_psp: group: "merge-psp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--channelmap {meta} " @@ -101,8 +100,7 @@ rule build_pars_psp_db: group: "merge-psp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--channelmap {meta} " @@ -144,8 +142,7 @@ rule build_pars_psp: group: "merge-psp" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--output {output.out_file} " "--in_db {input.in_db} " "--out_db {output.out_db} " @@ -176,8 +173,7 @@ rule build_psp: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_dsp.py " + f'{execenv_smk_py_script(config, "build_tier_dsp")}' "--log {log} " "--tier psp " f"--configs {ro(configs)} " diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 6ac7e05..10d9ab1 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -14,6 +14,7 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_tier, ) +from legenddataflow.execenv import execenv_smk_py_script psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -95,8 +96,7 @@ for key, dataset in part.datasets.items(): resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/par_psp_geds.py " + f'{execenv_smk_py_script(config, "par_geds_psp_average")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -141,8 +141,7 @@ rule build_par_psp: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/par_psp.py " + f'{execenv_smk_py_script(config, "par_geds_psp_average")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -183,8 +182,7 @@ rule build_svm_psp: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_build_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}' "--log {log} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " @@ -204,8 +202,7 @@ rule build_pars_psp_svm: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_dsp_svm_geds.py " + f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}' "--log {log} " "--input_file {input.dsp_pars} " "--output_file {output.dsp_pars} " diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index 522eb45..a5cd954 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -11,6 +11,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) +from legenddataflow.execenv import execenv_smk_py_script intier = "psp" @@ -66,8 +67,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc_phy.py " + f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -108,8 +108,7 @@ rule build_pht_qc_phy: mem_swap=60, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_pht_qc_phy.py " + f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -147,8 +146,7 @@ rule build_plts_pht_phy: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " @@ -170,7 +168,6 @@ rule build_pars_pht_phy: group: "merge-hit" shell: - "{swenv} python3 -B " - "{basedir}/../scripts/merge_channels.py " + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 311c14c..b0040fd 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -7,6 +7,7 @@ from legenddataflow.patterns import ( ) from legenddataflow.utils import set_last_rule_name from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.execenv import execenv_smk_py_script raw_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -39,8 +40,7 @@ rule build_raw_orca: mem_swap=110, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_orca.py " + f'{execenv_smk_py_script(config, "build_tier_raw_orca")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " @@ -69,8 +69,7 @@ rule build_raw_fcio: mem_swap=110, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}" + f"/../scripts/build_raw_fcio.py " + f'{execenv_smk_py_script(config, "build_tier_raw_fcio")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " @@ -105,8 +104,7 @@ rule build_raw_blind: mem_swap=110, runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_raw_blind.py " + f'{execenv_smk_py_script(config, "build_tier_raw_blind")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index 91a8755..7a4a686 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -8,6 +8,7 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_log_concat, ) +from legenddataflow.execenv import execenv_smk_py_script rule build_skm: @@ -26,8 +27,7 @@ rule build_skm: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_skm.py " + f'{execenv_smk_py_script(config, "build_tier_skm")}' f"--configs {ro(configs)} " "--timestamp {params.timestamp} " "--log {log} " diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index 9d80d1b..afb080c 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -8,6 +8,7 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_log_channel, ) +from legenddataflow.execenv import execenv_smk_py_script # This rule builds the tcm files each raw file @@ -28,8 +29,7 @@ rule build_tier_tcm: runtime=300, mem_swap=20, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/build_tcm.py " + f'{execenv_smk_py_script(config, "build_tier_tcm")}' "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " @@ -57,8 +57,7 @@ rule build_pulser_ids: resources: runtime=300, shell: - "{swenv} python3 -B " - "{basedir}/../scripts/pars_tcm_pulser.py " + f'{execenv_smk_py_script(config, "par_geds_tcm_pulser")}' "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " From 16e511be9a53f3be4c84b6425ad40bdf32661263 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 17:51:43 +0100 Subject: [PATCH 082/101] some renames --- .../scripts/par/geds/dsp/dplms.py | 4 +--- .../{blinding_calibration.py => blindcal.py} | 16 +++++----------- .../raw/{check_blinding.py => blindcheck.py} | 17 ++++++++--------- .../geds/tcm/{pars_tcm_pulser.py => pulser.py} | 0 4 files changed, 14 insertions(+), 23 deletions(-) rename workflow/src/legenddataflow/scripts/par/geds/raw/{blinding_calibration.py => blindcal.py} (88%) rename workflow/src/legenddataflow/scripts/par/geds/raw/{check_blinding.py => blindcheck.py} (90%) rename workflow/src/legenddataflow/scripts/par/geds/tcm/{pars_tcm_pulser.py => pulser.py} (100%) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py index 1bea45d..2b0004b 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py @@ -48,9 +48,7 @@ def par_geds_dsp_dplms() -> None: channel_dict = meta.channelmap(args.timestamp, system=args.datatype) channel = f"ch{channel_dict[args.channel].daq.rawid:07}" - configs = LegendMetadata(args.configs, lazy=True).on( - args.timestamp, system=args.datatype - ) + configs = TextDB(args.configs).on(args.timestamp, system=args.datatype) dsp_config = config_dict["inputs"]["proc_chain"][args.channel] dplms_json = config_dict["inputs"]["dplms_pars"][args.channel] diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py similarity index 88% rename from workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py rename to workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py index 8af27a2..a937458 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py @@ -13,6 +13,7 @@ import matplotlib.pyplot as plt import numpy as np from dbetto.catalog import Props +from legendmeta import LegendMetadata from lgdo import lh5 from pygama.pargen.energy_cal import HPGeCalibration @@ -44,25 +45,18 @@ def par_geds_raw_blindcal() -> None: logging.getLogger("matplotlib").setLevel(logging.INFO) log = logging.getLogger(__name__) - # load in channel map - # meta = LegendMetadata(args.meta, lazy=True) - - # chmap = meta.channelmap(args.timestamp) - # if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True: + meta = LegendMetadata(path=args.meta) + channel_dict = meta.channelmap(args.timestamp, system=args.datatype) + channel = f"ch{channel_dict[args.channel].daq.rawid:07}" # peaks to search for peaks_keV = np.array( [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50] ) - E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as( - "np" - ) + E_uncal = lh5.read(f"{channel}/raw/daqenergy", sorted(args.files))[0].view_as("np") E_uncal = E_uncal[E_uncal > 200] guess_keV = 2620 / np.nanpercentile(E_uncal, 99) # usual simple guess - # Euc_min = peaks_keV[0] / guess_keV * 0.6 - # Euc_max = peaks_keV[-1] / guess_keV * 1.1 - # dEuc = 1 / guess_keV # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py similarity index 90% rename from workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py rename to workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py index 4a8f53c..7f645c1 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py @@ -48,20 +48,19 @@ def par_geds_raw_blindcheck() -> None: log = build_log(config_dict, args.log) # get the usability status for this channel - chmap = ( - LegendMetadata(args.metadata, lazy=True) - .channelmap(args.timestamp) - .map("daq.rawid") + chmap = LegendMetadata(args.metadata, lazy=True).channelmap( + args.timestamp, system=args.datatype ) - det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"] + channel = f"ch{chmap[args.channel].daq.rawid:07}" + det_status = chmap[args.channel]["analysis"]["is_blinded"] # read in calibration curve for this channel - blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"] + blind_curve = Props.read_from(args.blind_curve)[channel]["pars"]["operations"] # load in the data - daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[ - 0 - ].view_as("np") + daqenergy = lh5.read(f"{channel}/raw/daqenergy", sorted(args.files))[0].view_as( + "np" + ) # calibrate daq energy using pre existing curve daqenergy_cal = ne.evaluate( diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py similarity index 100% rename from workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py rename to workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py From 0eab4481033be15ab05d3e5a5071028b10ac489b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 17:52:07 +0100 Subject: [PATCH 083/101] add exec scripts --- pyproject.toml | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 807e71b..86f7d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,14 +84,43 @@ docs = [ [project.scripts] dataprod = "legenddataflow.execenv:dataprod" +create_chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist" +merge_channels = "legenddataflow.scripts.merge_channels:merge_channels" +build_filedb = "legenddataflow.scripts.build_filedb:build_filedb" +build_tier_dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp" +build_tier_evt = "legenddataflow.scripts.tier.evt:build_tier_evt" +build_tier_hit = "legenddataflow.scripts.tier.hit:build_tier_hit" +build_tier_raw_blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind" +build_tier_raw_fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio" +build_tier_raw_orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca" +build_tier_skm = "legenddataflow.scripts.tier.skm:build_tier_skm" +build_tier_tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm" +par_geds_dsp_dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms" +par_geds_dsp_eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt" +par_geds_dsp_evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel" +par_geds_dsp_nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt" +par_geds_dsp_svm_build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build" +par_geds_dsp_svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm" +par_geds_dsp_tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau" +par_geds_hit_aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe" +par_geds_hit_ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal" +par_geds_hit_lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq" +par_geds_hit_qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc" +par_geds_pht_aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe" +par_geds_pht_ecal_part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part" +par_geds_pht_fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast" +par_geds_pht_qc_phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy" +par_geds_pht_qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc" +par_geds_psp_average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average" +par_geds_raw_blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal" +par_geds_raw_blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck" +par_geds_tcm_pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser" [tool.uv.workspace] exclude = ["generated", "inputs", "software", "workflow"] [tool.uv] -dev-dependencies = [ - "legend-dataflow[test]", -] +default-groups = [] [tool.pytest.ini_options] minversion = "6.0" From 68ac1804b2d75043403ce770cce0b077e643610a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 17:53:09 +0100 Subject: [PATCH 084/101] more bugfixes --- workflow/src/legenddataflow/FileKey.py | 10 ++++++---- workflow/src/legenddataflow/cal_grouping.py | 2 +- workflow/src/legenddataflow/create_pars_keylist.py | 14 +++++--------- workflow/src/legenddataflow/execenv.py | 13 ++++++------- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py index 1e33b96..7870e46 100644 --- a/workflow/src/legenddataflow/FileKey.py +++ b/workflow/src/legenddataflow/FileKey.py @@ -73,7 +73,7 @@ def get_filekey_from_filename(cls, filename): def get_filekey_from_pattern(cls, filename, pattern=None): if isinstance(pattern, Path): pattern = pattern.as_posix() - + filename = str(filename) key_pattern_rx = re.compile( regex_from_filepattern(cls.key_pattern if pattern is None else pattern) ) @@ -108,9 +108,11 @@ def parse_keypart(cls, keypart): return cls(**d) def expand(self, file_pattern, **kwargs): - if isinstance(file_pattern, Path): - file_pattern = file_pattern.as_posix() - wildcard_dict = dict(**self._asdict(), **kwargs) + file_pattern = str(file_pattern) + wildcard_dict = self._asdict() + if kwargs is not None: + for key, value in kwargs.items(): + wildcard_dict[key] = value wildcard_dict = { wildcard: [wildcard_value] if isinstance(wildcard_value, str) diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py index 13145d7..646791a 100644 --- a/workflow/src/legenddataflow/cal_grouping.py +++ b/workflow/src/legenddataflow/cal_grouping.py @@ -194,7 +194,7 @@ def get_log_file( get_pattern_log_channel(self.setup, name, processing_timestamp) )[0] else: - return "/tmp/log.log" + return "log.log" def get_timestamp( self, catalog, dataset, channel, tier, experiment="l200", datatype="cal" diff --git a/workflow/src/legenddataflow/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py index 9325a6d..5f51828 100644 --- a/workflow/src/legenddataflow/create_pars_keylist.py +++ b/workflow/src/legenddataflow/create_pars_keylist.py @@ -7,10 +7,9 @@ import warnings from pathlib import Path -import snakemake as smk import yaml -from .FileKey import FileKey, ProcessingFileKey +from .FileKey import FileKey, ProcessingFileKey, regex_from_filepattern from .patterns import par_validity_pattern @@ -107,13 +106,10 @@ def get_keys(keypart, search_pattern): wildcard_dict = dict(ext="*", **d._asdict()) else: wildcard_dict = d._asdict() - try: - tier_pattern_rx = re.compile( - smk.io.regex_from_filepattern(str(search_pattern)) - ) - except AttributeError: - tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern))) - fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0] + + tier_pattern_rx = re.compile(regex_from_filepattern(str(search_pattern))) + key = FileKey.get_filekey_from_pattern(search_pattern, search_pattern) + fn_glob_pattern = key.get_path_from_filekey(search_pattern, **wildcard_dict)[0] p = Path(fn_glob_pattern) parts = p.parts[p.is_absolute() :] files = Path(p.root).glob(str(Path(*parts))) diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index 6a0239d..9fd2ac0 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -60,19 +60,19 @@ def execenv_python(config, aslist=False): return " ".join(cmdline), cmdenv -def execenv_smk_py_script(workflow, config, scriptname, aslist=False): +def execenv_smk_py_script(config, scriptname, aslist=False): """Returns the command used to run a Python script for a Snakemake rule. For example: `apptainer run image.sif python path/to/script.py` """ config = AttrsDict(config) - cmdline, cmdenv = execenv_python(config, aslist=True) - cmdline.append(f"{workflow.basedir}/scripts/{scriptname}") + cmdline, _ = execenv_prefix(config, aslist=True) + cmdline.append(f"{config.paths.install}/bin/{scriptname} ") if aslist: - return cmdline, cmdenv - return " ".join(cmdline), cmdenv + return cmdline + return " ".join(cmdline) def dataprod() -> None: @@ -240,9 +240,8 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): "pip", "--no-cache", "install", - str(config_loc), + str(config_loc), # +"[dataprod]" ] - if args.editable: cmd_expr.insert(-1, "--editable") From e3fe518d0e64ec40b46bfbbc7a34feabbc746990 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 17:53:32 +0100 Subject: [PATCH 085/101] config to attrsdict --- workflow/Snakefile | 5 +++-- workflow/Snakefile-build-raw | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 50be710..7bc5c65 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -18,24 +18,25 @@ from datetime import datetime from collections import OrderedDict import logging +from dbetto import AttrsDict from legendmeta import LegendMetadata from legenddataflow import CalGrouping from legenddataflow import utils utils.subst_vars_in_snakemake_config(workflow, config) +config = AttrsDict(config) check_in_cycle = True configs = utils.config_path(config) chan_maps = utils.chan_map_path(config) meta = utils.metadata_path(config) det_status = utils.det_status_path(config) -swenv = utils.runcmd(config) basedir = workflow.basedir time = datetime.now().strftime("%Y%m%dT%H%M%SZ") if not Path(meta).exists(): - LegendMetadata().checkout(config["legend_metadata_version"]) + LegendMetadata(meta).checkout(config.legend_metadata_version) part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml") diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index da0d58d..6346978 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -11,8 +11,10 @@ from pathlib import Path from legenddataflow import patterns as patt from legenddataflow import utils, execenv, ParsKeyResolve from datetime import datetime +from dbetto import AttrsDict utils.subst_vars_in_snakemake_config(workflow, config) +config = AttrsDict(config) check_in_cycle = True swenv = execenv.execenv_prefix(config) @@ -22,7 +24,7 @@ det_status = utils.det_status_path(config) time = datetime.now().strftime("%Y%m%dT%H%M%SZ") if not Path(meta_path).exists(): - LegendMetadata(meta_path).checkout(config["legend_metadata_version"]) + LegendMetadata(meta_path).checkout(config.legend_metadata_version) wildcard_constraints: From d2a881aacef9b24e0c85070901a62d3909352e9d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 17:54:01 +0100 Subject: [PATCH 086/101] fix tests maybe --- tests/dummy_cycle/config.json | 40 ------------- tests/dummy_cycle/config.yaml | 58 +++++++++++++++++++ .../generated/par/dsp/validity.jsonl | 3 - .../dataprod/overrides/dsp/validity.jsonl | 1 - tests/test_util.py | 8 +-- 5 files changed, 62 insertions(+), 48 deletions(-) delete mode 100644 tests/dummy_cycle/config.json create mode 100644 tests/dummy_cycle/config.yaml delete mode 100644 tests/dummy_cycle/generated/par/dsp/validity.jsonl delete mode 100644 tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl diff --git a/tests/dummy_cycle/config.json b/tests/dummy_cycle/config.json deleted file mode 100644 index e9a358d..0000000 --- a/tests/dummy_cycle/config.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "setups": { - "test": { - "paths": { - "sandbox_path": "", - "tier_daq": "$_/input_data/tier/daq", - - "dataflow": "$_/dataflow", - - "metadata": "$_/inputs", - "config": "$_/inputs/dataprod/config", - "par_overwrite": "$_/inputs/dataprod/overrides", - "chan_map": "$_/inputs/hardware/configuration", - "detector_db": "$_/inputs/hardware/detectors", - - "tier": "$_/generated/tier", - "tier_raw": "$_/generated/tier/raw", - "tier_tcm": "$_/generated/tier/tcm", - "tier_dsp": "$_/generated/tier/dsp", - "tier_hit": "$_/generated/tier/hit", - "tier_evt": "$_/generated/tier/evt", - - "par": "$_/generated/par", - "par_raw": "$_/generated/par/raw", - "par_tcm": "$_/generated/par/tcm", - "par_dsp": "$_/generated/par/dsp", - "par_hit": "$_/generated/par/hit", - "par_evt": "$_/generated/par/evt", - - "plt": "$_/generated/plt", - "log": "$_/generated/log", - - "tmp_plt": "$_/generated/tmp/plt", - "tmp_log": "$_/generated/tmp/log", - "tmp_filelists": "$_/generated/tmp/filelists", - "tmp_par": "$_/generated/tmp/par" - } - } - } -} diff --git a/tests/dummy_cycle/config.yaml b/tests/dummy_cycle/config.yaml new file mode 100644 index 0000000..a40938d --- /dev/null +++ b/tests/dummy_cycle/config.yaml @@ -0,0 +1,58 @@ +paths: + sandbox_path: "" + tier_daq: $_/generated/tier/daq + tier_raw_blind: "" + + workflow: $_/workflow + + metadata: $_/inputs + config: $_/inputs/dataprod/config + par_overwrite: $_/inputs/dataprod/overrides + chan_map: $_/inputs/hardware/configuration + detector_status: $_/inputs/datasets + detector_db: $_/inputs/hardware/detectors + + tier: $_/generated/tier + tier_raw: /data2/public/prodenv/prod-blind/ref-raw/generated/tier/raw + tier_tcm: $_/generated/tier/tcm + tier_dsp: $_/generated/tier/dsp + tier_hit: $_/generated/tier/hit + tier_ann: $_/generated/tier/ann + tier_evt: $_/generated/tier/evt + tier_psp: $_/generated/tier/psp + tier_pht: $_/generated/tier/pht + tier_pan: $_/generated/tier/pan + tier_pet: $_/generated/tier/pet + tier_skm: $_/generated/tier/skm + + par: $_/generated/par + par_raw: $_/generated/par/raw + par_tcm: $_/generated/par/tcm + par_dsp: $_/generated/par/dsp + par_hit: $_/generated/par/hit + par_evt: $_/generated/par/evt + par_psp: $_/generated/par/psp + par_pht: $_/generated/par/pht + par_pet: $_/generated/par/pet + + plt: $_/generated/plt + log: $_/generated/log + + tmp_plt: $_/generated/tmp/plt + tmp_log: $_/generated/tmp/log + tmp_filelists: $_/generated/tmp/filelists + tmp_par: $_/generated/tmp/par + + src: $_/software/python/src + install: $_/.snakemake/legend-dataflow/venv + +table_format: + raw: ch{ch:07d}/raw + dsp: ch{ch:07d}/dsp + psp: ch{ch:07d}/dsp + hit: ch{ch:07d}/hit + pht: ch{ch:07d}/hit + evt: "{grp}/evt" + pet: "{grp}/evt" + skm: "{grp}/skm" + tcm: hardware_tcm_1 diff --git a/tests/dummy_cycle/generated/par/dsp/validity.jsonl b/tests/dummy_cycle/generated/par/dsp/validity.jsonl deleted file mode 100644 index c730b86..0000000 --- a/tests/dummy_cycle/generated/par/dsp/validity.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -{"valid_from": "20230101T123456Z", "category": "all", "apply": ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]} -{"valid_from": "20230110T123456Z", "category": "all", "apply": ["lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]} -{"valid_from": "20230202T004321Z", "category": "all", "apply": ["cal/p00/r001/l200-p00-r001-cal-20230202T004321Z-par_dsp.json","lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json"]} diff --git a/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl b/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl deleted file mode 100644 index 4a13449..0000000 --- a/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"valid_from": "20230101T123456Z", "category": "all", "apply": ["cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json"]} diff --git a/tests/test_util.py b/tests/test_util.py index 38d8910..01f5ffb 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -11,7 +11,7 @@ testprod = Path(__file__).parent / "dummy_cycle" -with (testprod / "config.json").open() as r: +with (testprod / "config.yaml").open() as r: setup = json.load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] @@ -42,7 +42,7 @@ def test_filekey(): assert ( FileKey.get_filekey_from_pattern( key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0], - utils.get_pattern_tier(setup, "dsp"), + utils.get_tier_path(setup, "dsp"), ).name == key.name ) @@ -98,6 +98,6 @@ def test_create_pars_keylist(): pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]} )[1].apply ) == { - "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json", - "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", + "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml", + "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.yaml", } From 14e523ed10db08ff3f93981ec0d64f1e7f37cc88 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 18:06:22 +0100 Subject: [PATCH 087/101] use channel merge func --- workflow/rules/channel_merge.smk | 37 +++++---- workflow/rules/dsp.smk | 134 +------------------------------ workflow/rules/dsp_pars_geds.smk | 4 +- workflow/rules/hit.smk | 83 +------------------ workflow/rules/pht.smk | 74 +---------------- workflow/rules/psp.smk | 119 +-------------------------- 6 files changed, 29 insertions(+), 422 deletions(-) diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk index 21ebf25..ef2b57e 100644 --- a/workflow/rules/channel_merge.smk +++ b/workflow/rules/channel_merge.smk @@ -1,4 +1,4 @@ -from scripts.util.patterns import ( +from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, get_pattern_plts, @@ -6,7 +6,7 @@ from scripts.util.patterns import ( get_pattern_pars_tmp, get_pattern_pars, ) -from scripts.util.utils import set_last_rule_name +from legenddataflow.utils import set_last_rule_name import inspect from legenddataflow.execenv import execenv_smk_py_script @@ -14,7 +14,7 @@ def build_merge_rules(tier,lh5_merge=False): rule: input: lambda wildcards: get_plt_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", tier, basedir, @@ -25,7 +25,7 @@ def build_merge_rules(tier,lh5_merge=False): timestamp="{timestamp}", datatype="cal", output: - get_pattern_plts(setup, tier), + get_pattern_plts(config, tier), group: f"merge-{tier}" shell: @@ -39,7 +39,7 @@ def build_merge_rules(tier,lh5_merge=False): rule: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", tier, basedir, @@ -48,9 +48,12 @@ def build_merge_rules(tier,lh5_merge=False): name="objects", extension="pkl", ), + params: + timestamp="{timestamp}", + datatype="cal", output: get_pattern_pars( - setup, + config, tier, name="objects", extension="dir", @@ -71,7 +74,7 @@ def build_merge_rules(tier,lh5_merge=False): rule: input: lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", tier, basedir, @@ -84,7 +87,7 @@ def build_merge_rules(tier,lh5_merge=False): output: temp( get_pattern_pars_tmp( - setup, + config, tier, datatype="cal", ) @@ -92,7 +95,7 @@ def build_merge_rules(tier,lh5_merge=False): group: f"merge-{tier}" shell: - execenv_smk_py_script(config, "merge_channels") + f'{execenv_smk_py_script(config, "merge_channels")}' "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -103,7 +106,7 @@ def build_merge_rules(tier,lh5_merge=False): rule: input: in_files=lambda wildcards: get_par_chanlist( - setup, + config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", tier, basedir, @@ -112,13 +115,13 @@ def build_merge_rules(tier,lh5_merge=False): extension="lh5" if lh5_merge is True else inspect.signature(get_par_chanlist).parameters['extension'].default, ), in_db=get_pattern_pars_tmp( - setup, + config, "dsp", datatype="cal", - ) if lh5_merge is True else None, - plts=get_pattern_plts(setup, "dsp"), + ) if lh5_merge is True else [], + plts=get_pattern_plts(config, "dsp"), objects=get_pattern_pars( - setup, + config, "dsp", name="objects", extension="dir", @@ -129,17 +132,17 @@ def build_merge_rules(tier,lh5_merge=False): datatype="cal", output: out_file=get_pattern_pars( - setup, + config, tier, extension="lh5" if lh5_merge is True else inspect.signature(get_pattern_pars).parameters['extension'].default, check_in_cycle=check_in_cycle, ), - out_db=get_pattern_pars(setup, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else None, + out_db=get_pattern_pars(config, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else [], group: f"merge-{tier}" run: shell_string = ( - execenv_smk_py_script(config, "merge_channels") + f'{execenv_smk_py_script(config, "merge_channels")}' "--output {output.out_file} " "--input {input.in_files} " "--timestamp {params.timestamp} " diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index a386c86..4683a7c 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -30,140 +30,10 @@ Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) -rule build_plts_dsp: - input: - lambda wildcards: get_plt_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - get_pattern_plts(config, "dsp"), - group: - "merge-dsp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - "--channelmap {meta} " +include: "channel_merge.smk" -rule build_pars_dsp_objects: - input: - lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - get_pattern_pars( - config, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - group: - "merge-dsp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " - - -rule build_pars_dsp_db: - input: - lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - temp( - get_pattern_pars_tmp( - config, - "dsp", - datatype="cal", - ) - ), - group: - "merge-dsp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " - - -rule build_pars_dsp: - input: - in_files=lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - config, - "dsp", - datatype="cal", - ), - plts=get_pattern_plts(config, "dsp"), - objects=get_pattern_pars( - config, - "dsp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - timestamp="{timestamp}", - datatype="cal", - output: - out_file=get_pattern_pars( - config, - "dsp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(config, "dsp", check_in_cycle=check_in_cycle), - group: - "merge-dsp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--timestamp {params.timestamp} " - "--channelmap {meta} " +build_merge_rules("dsp", lh5_merge=True) rule build_dsp: diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 52f5cf6..98a5a55 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -141,9 +141,7 @@ rule build_pars_dsp_dplms_geds: channel="{channel}", output: dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")), - lh5_path=temp( - get_pattern_pars_tmp_channel(config, "dsp", "dplms", extension="lh5") - ), + lh5_path=temp(get_pattern_pars_tmp_channel(config, "dsp", extension="lh5")), plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "dplms")), log: get_pattern_log_channel(config, "pars_dsp_dplms", time), diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index aaa1cf5..0a2c6f6 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -219,89 +219,10 @@ rule build_lq_calibration: "{input.files}" -rule build_pars_hit_objects: - input: - lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "hit", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - output: - get_pattern_pars( - config, - "hit", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - ro_input=lambda _, input: ro(input), - group: - "merge-hit" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {params.ro_input} " - "--output {output} " - "--channelmap {meta} " +include: "channel_merge.smk" -rule build_plts_hit: - input: - lambda wildcards: get_plt_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "hit", - basedir, - det_status, - chan_maps, - ), - output: - get_pattern_plts(config, "hit"), - params: - ro_input=lambda _, input: ro(input), - group: - "merge-hit" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {params.ro_input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_pars_hit: - input: - infiles=lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "hit", - basedir, - det_status, - chan_maps, - ), - plts=get_pattern_plts(config, "hit"), - objects=get_pattern_pars( - config, - "hit", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - params: - ro_input=lambda _, input: {k: ro(v) for k, v in input.items()}, - output: - get_pattern_pars(config, "hit", check_in_cycle=check_in_cycle), - group: - "merge-hit" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {params.ro_input[infiles]} " - "--output {output} " - "--channelmap {meta} " +build_merge_rules("hit", lh5_merge=False) rule build_hit: diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 1b792c6..bab3de7 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -795,80 +795,10 @@ rule_order_list.append(fallback_pht_rule.name) workflow._ruleorder.add(*rule_order_list) # [::-1] -rule build_pars_pht_objects: - input: - lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "pht", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - output: - get_pattern_pars( - config, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - group: - "merge-hit" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " +include: "channel_merge.smk" -rule build_plts_pht: - input: - lambda wildcards: get_plt_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "pht", - basedir, - det_status, - chan_maps, - ), - output: - get_pattern_plts(config, "pht"), - group: - "merge-hit" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - - -rule build_pars_pht: - input: - infiles=lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "pht", - basedir, - det_status, - chan_maps, - ), - plts=get_pattern_plts(config, "pht"), - objects=get_pattern_pars( - config, - "pht", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - output: - get_pattern_pars(config, "pht", check_in_cycle=check_in_cycle), - group: - "merge-hit" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input.infiles} " - "--output {output} " +build_merge_rules("pht", lh5_merge=False) rule build_pht: diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index 990f186..ab2e70f 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -29,125 +29,10 @@ Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) -rule build_pars_psp_objects: - input: - lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "psp", - basedir, - det_status, - chan_maps, - name="objects", - extension="pkl", - ), - output: - get_pattern_pars( - config, - "psp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - group: - "merge-psp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_plts_psp: - input: - lambda wildcards: get_plt_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "psp", - basedir, - det_status, - chan_maps, - ), - output: - get_pattern_plts(config, "psp"), - group: - "merge-psp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - "--channelmap {meta} " - - -rule build_pars_psp_db: - input: - lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "psp", - basedir, - det_status, - chan_maps, - ), - output: - temp( - get_pattern_pars_tmp( - config, - "psp", - datatype="cal", - ) - ), - group: - "merge-psp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--input {input} " - "--output {output} " - "--channelmap {meta} " +include: "channel_merge.smk" -rule build_pars_psp: - input: - in_files=lambda wildcards: get_par_chanlist( - config, - f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - "dsp", - basedir, - det_status, - chan_maps, - name="dplms", - extension="lh5", - ), - in_db=get_pattern_pars_tmp( - config, - "psp", - datatype="cal", - ), - plts=get_pattern_plts(config, "psp"), - objects=get_pattern_pars( - config, - "psp", - name="objects", - extension="dir", - check_in_cycle=check_in_cycle, - ), - output: - out_file=get_pattern_pars( - config, - "psp", - extension="lh5", - check_in_cycle=check_in_cycle, - ), - out_db=get_pattern_pars(config, "psp", check_in_cycle=check_in_cycle), - group: - "merge-psp" - shell: - f'{execenv_smk_py_script(config, "merge_channels")}' - "--output {output.out_file} " - "--in_db {input.in_db} " - "--out_db {output.out_db} " - "--input {input.in_files} " - "--channelmap {meta} " +build_merge_rules("psp", lh5_merge=True) rule build_psp: From f323190f8322f18457f6301494cf639d6d2f4c9c Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 18:10:49 +0100 Subject: [PATCH 088/101] json to yaml tests --- tests/test_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 01f5ffb..4041614 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,6 +1,6 @@ -import json from pathlib import Path +import yaml from legenddataflow import ( FileKey, ParsKeyResolve, @@ -12,7 +12,7 @@ testprod = Path(__file__).parent / "dummy_cycle" with (testprod / "config.yaml").open() as r: - setup = json.load(r) + setup = yaml.safe_load(r) subst_vars(setup, var_values={"_": str(testprod)}) setup = setup["setups"]["test"] From 5002261710c48c96ec2346f86bbb273239f0de4d Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 20:19:17 +0100 Subject: [PATCH 089/101] fix wildcard constraint --- workflow/src/legenddataflow/cal_grouping.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py index 646791a..b2ce781 100644 --- a/workflow/src/legenddataflow/cal_grouping.py +++ b/workflow/src/legenddataflow/cal_grouping.py @@ -85,7 +85,7 @@ def get_par_files( ): all_par_files.append(par_file) if channel == "default": - channel = "{detector}" + channel = "{channel}" selected_par_files = [] for par_file in all_par_files: fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) @@ -138,7 +138,7 @@ def get_plt_files( ): all_par_files.append(par_file) if channel == "default": - channel = "{detector}" + channel = "{channel}" selected_par_files = [] for par_file in all_par_files: fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name) @@ -187,7 +187,7 @@ def get_log_file( if len(par_files) > 0: fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) if channel == "default": - fk.channel = "{detector}" + fk.channel = "{channel}" else: fk.channel = channel return fk.get_path_from_filekey( @@ -208,7 +208,6 @@ def get_timestamp( datatype=datatype, name=None, ) - if len(par_files) > 0: fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name) return fk.timestamp @@ -231,6 +230,6 @@ def get_wildcard_constraints(self, dataset, channel): out_string = "" for channel in exclude_chans: out_string += f"(?!{channel})" - return out_string + r"^[VPCB]\d{1}\w{5}$" + return out_string + r"[PCVB]{1}\d{1}\w{5}" else: - return r"^[VPCB]\d{1}\w{5}$" + return r"[PCVB]{1}\d{1}\w{5}" From 479acac5c7b0159b838e2b1e4f40772b5f5b27e2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 20:19:41 +0100 Subject: [PATCH 090/101] split out par rules --- workflow/Snakefile | 4 +- workflow/rules/channel_merge.smk | 12 +- workflow/rules/dsp.smk | 7 - workflow/rules/dsp_pars_geds.smk | 6 - workflow/rules/hit.smk | 200 +---- workflow/rules/hit_pars_geds.smk | 205 +++++ workflow/rules/pht.smk | 771 +----------------- workflow/rules/pht_pars_geds.smk | 768 +++++++++++++++++ .../{pht_fast.smk => pht_pars_geds_fast.smk} | 0 workflow/rules/psp.smk | 8 +- workflow/rules/psp_pars_geds.smk | 5 +- 11 files changed, 990 insertions(+), 996 deletions(-) create mode 100644 workflow/rules/hit_pars_geds.smk create mode 100644 workflow/rules/pht_pars_geds.smk rename workflow/rules/{pht_fast.smk => pht_pars_geds_fast.smk} (100%) diff --git a/workflow/Snakefile b/workflow/Snakefile index 7bc5c65..db7e3c3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -58,9 +58,11 @@ include: "rules/dsp_pars_geds.smk" include: "rules/dsp.smk" include: "rules/psp_pars_geds.smk" include: "rules/psp.smk" +include: "rules/hit_pars_geds.smk" include: "rules/hit.smk" +include: "rules/pht_pars_geds.smk" +include: "rules/pht_pars_geds_fast.smk" include: "rules/pht.smk" -include: "rules/pht_fast.smk" include: "rules/ann.smk" include: "rules/evt.smk" include: "rules/skm.smk" diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk index ef2b57e..b970840 100644 --- a/workflow/rules/channel_merge.smk +++ b/workflow/rules/channel_merge.smk @@ -10,7 +10,9 @@ from legenddataflow.utils import set_last_rule_name import inspect from legenddataflow.execenv import execenv_smk_py_script -def build_merge_rules(tier,lh5_merge=False): +def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): + if lh5_tier is None: + lh5_tier = tier rule: input: lambda wildcards: get_plt_chanlist( @@ -108,7 +110,7 @@ def build_merge_rules(tier,lh5_merge=False): in_files=lambda wildcards: get_par_chanlist( config, f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels", - tier, + lh5_tier, basedir, det_status, chan_maps, @@ -116,13 +118,13 @@ def build_merge_rules(tier,lh5_merge=False): ), in_db=get_pattern_pars_tmp( config, - "dsp", + tier, datatype="cal", ) if lh5_merge is True else [], - plts=get_pattern_plts(config, "dsp"), + plts=get_pattern_plts(config, tier), objects=get_pattern_pars( config, - "dsp", + tier, name="objects", extension="dir", check_in_cycle=check_in_cycle, diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index 4683a7c..f296716 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -7,7 +7,6 @@ Snakemake rules for processing dsp tier. from legenddataflow.pars_loading import ParsCatalog from legenddataflow.create_pars_keylist import ParsKeyResolve from pathlib import Path -from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.patterns import ( get_pattern_plts, get_pattern_tier, @@ -23,12 +22,6 @@ dsp_par_catalog = ParsKeyResolve.get_par_catalog( {"cal": ["par_dsp"], "lar": ["par_dsp"]}, ) -dsp_par_cat_file = Path(pars_path(config)) / "dsp" / "validity.yaml" -if dsp_par_cat_file.is_file(): - dsp_par_cat_file.unlink() -Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file) - include: "channel_merge.smk" diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 98a5a55..86b8342 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -15,12 +15,6 @@ from legenddataflow.patterns import ( ) from legenddataflow.execenv import execenv_smk_py_script -dsp_par_catalog = ParsKeyResolve.get_par_catalog( - ["-*-*-*-cal"], - get_pattern_tier(config, "raw", check_in_cycle=False), - {"cal": ["par_dsp"], "lar": ["par_dsp"]}, -) - rule build_pars_dsp_tau_geds: input: diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 0a2c6f6..5d83174 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -6,19 +6,13 @@ Snakemake rules for processing hit tier. This is done in 4 steps: - running build hit over all channels using par file """ -from legenddataflow.pars_loading import ParsCatalog from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog from pathlib import Path from legenddataflow.patterns import ( - get_pattern_pars_tmp_channel, - get_pattern_plts_tmp_channel, - get_pattern_log_channel, - get_pattern_pars, - get_pattern_plts, get_pattern_tier, - get_pattern_pars_tmp, get_pattern_log, - get_pattern_pars, + get_pattern_pars_tmp, ) from legenddataflow.execenv import execenv_smk_py_script @@ -28,196 +22,6 @@ hit_par_catalog = ParsKeyResolve.get_par_catalog( {"cal": ["par_hit"], "lar": ["par_hit"]}, ) -hit_par_cat_file = Path(pars_path(config)) / "hit" / "validity.yaml" -if hit_par_cat_file.is_file(): - hit_par_cat_file.unlink() -Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file) - - -# This rule builds the qc using the calibration dsp files and fft files -rule build_qc: - input: - files=os.path.join( - filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - fft_files=os.path.join( - filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")), - plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")), - log: - get_pattern_log_channel(config, "pars_hit_qc", time), - group: - "par-hit" - resources: - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_hit_qc")}' - "--log {log} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--configs {configs} " - "--metadata {meta} " - "--plot_path {output.plot_file} " - "--save_path {output.qc_file} " - "--pulser_file {input.pulser} " - "--cal_files {input.files} " - "--fft_files {input.fft_files} " - "--overwrite_files {input.overwrite_files} " - - -# This rule builds the energy calibration using the calibration dsp files -rule build_energy_calibration: - input: - files=os.path.join( - filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - ctc_dict=ancient( - lambda wildcards: ParsCatalog.get_par_file( - config, wildcards.timestamp, "dsp" - ) - ), - inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"), - in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")), - results_file=temp( - get_pattern_pars_tmp_channel( - config, "hit", "energy_cal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")), - log: - get_pattern_log_channel(config, "pars_hit_energy_cal", time), - group: - "par-hit" - resources: - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' - "--log {log} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--configs {configs} " - "--metadata {meta} " - "--plot_path {output.plot_file} " - "--results_path {output.results_file} " - "--save_path {output.ecal_file} " - "--inplot_dict {input.inplots} " - "--in_hit_dict {input.in_hit_dict} " - "--ctc_dict {input.ctc_dict} " - "--pulser_file {input.pulser} " - "--files {input.files}" - - -# This rule builds the a/e calibration using the calibration dsp files -rule build_aoe_calibration: - input: - files=os.path.join( - filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"), - eres_file=get_pattern_pars_tmp_channel( - config, "hit", "energy_cal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")), - aoe_results=temp( - get_pattern_pars_tmp_channel( - config, "hit", "aoe_cal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")), - log: - get_pattern_log_channel(config, "pars_hit_aoe_cal", time), - group: - "par-hit" - resources: - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--pulser_file {input.pulser} " - "--ecal_file {input.ecal_file} " - "{input.files}" - - -# This rule builds the lq calibration using the calibration dsp files -rule build_lq_calibration: - input: - files=os.path.join( - filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" - ), - pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"), - eres_file=get_pattern_pars_tmp_channel( - config, "hit", "aoe_cal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")), - lq_results=temp( - get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl") - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")), - log: - get_pattern_log_channel(config, "pars_hit_lq_cal", time), - group: - "par-hit" - resources: - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_hit_lq")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--pulser_file {input.pulser} " - "--ecal_file {input.ecal_file} " - "{input.files}" - include: "channel_merge.smk" diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk new file mode 100644 index 0000000..8143f82 --- /dev/null +++ b/workflow/rules/hit_pars_geds.smk @@ -0,0 +1,205 @@ +""" +Snakemake rules for processing hit tier. This is done in 4 steps: +- extraction of calibration curves(s) for each channel from cal data +- extraction of psd calibration parameters for each channel from cal data +- combining of all channels into single pars files with associated plot and results files +- running build hit over all channels using par file +""" + +from pathlib import Path +from legenddataflow.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_pars, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_log, + get_pattern_pars, +) +from legenddataflow.execenv import execenv_smk_py_script + + +# This rule builds the qc using the calibration dsp files and fft files +rule build_qc: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + fft_files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")), + log: + get_pattern_log_channel(config, "pars_hit_qc", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_qc")}' + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--metadata {meta} " + "--plot_path {output.plot_file} " + "--save_path {output.qc_file} " + "--pulser_file {input.pulser} " + "--cal_files {input.files} " + "--fft_files {input.fft_files} " + "--overwrite_files {input.overwrite_files} " + + +# This rule builds the energy calibration using the calibration dsp files +rule build_energy_calibration: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ctc_dict=ancient( + lambda wildcards: ParsCatalog.get_par_file( + config, wildcards.timestamp, "dsp" + ) + ), + inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"), + in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")), + results_file=temp( + get_pattern_pars_tmp_channel( + config, "hit", "energy_cal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")), + log: + get_pattern_log_channel(config, "pars_hit_energy_cal", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--metadata {meta} " + "--plot_path {output.plot_file} " + "--results_path {output.results_file} " + "--save_path {output.ecal_file} " + "--inplot_dict {input.inplots} " + "--in_hit_dict {input.in_hit_dict} " + "--ctc_dict {input.ctc_dict} " + "--pulser_file {input.pulser} " + "--files {input.files}" + + +# This rule builds the a/e calibration using the calibration dsp files +rule build_aoe_calibration: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"), + eres_file=get_pattern_pars_tmp_channel( + config, "hit", "energy_cal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")), + aoe_results=temp( + get_pattern_pars_tmp_channel( + config, "hit", "aoe_cal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")), + log: + get_pattern_log_channel(config, "pars_hit_aoe_cal", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--aoe_results {output.aoe_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--pulser_file {input.pulser} " + "--ecal_file {input.ecal_file} " + "{input.files}" + + +# This rule builds the lq calibration using the calibration dsp files +rule build_lq_calibration: + input: + files=os.path.join( + filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist" + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"), + eres_file=get_pattern_pars_tmp_channel( + config, "hit", "aoe_cal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")), + lq_results=temp( + get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl") + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")), + log: + get_pattern_log_channel(config, "pars_hit_lq_cal", time), + group: + "par-hit" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_lq")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--lq_results {output.lq_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--pulser_file {input.pulser} " + "--ecal_file {input.ecal_file} " + "{input.files}" diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index bab3de7..fa85971 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -6,19 +6,14 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4 - running build hit over all channels using par file """ -from legenddataflow.pars_loading import ParsCatalog from legenddataflow.create_pars_keylist import ParsKeyResolve +from legenddataflow.pars_loading import ParsCatalog from pathlib import Path from legenddataflow.utils import filelist_path, set_last_rule_name from legenddataflow.patterns import ( - get_pattern_pars_tmp_channel, - get_pattern_plts_tmp_channel, - get_pattern_log_channel, - get_pattern_plts, get_pattern_tier, get_pattern_pars_tmp, get_pattern_log, - get_pattern_pars, ) from legenddataflow.execenv import execenv_smk_py_script @@ -28,773 +23,9 @@ pht_par_catalog = ParsKeyResolve.get_par_catalog( {"cal": ["par_pht"], "lar": ["par_pht"]}, ) -pht_par_cat_file = Path(pars_path(config)) / "pht" / "validity.yaml" -if pht_par_cat_file.is_file(): - pht_par_cat_file.unlink() -Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file) - intier = "psp" -rule pht_checkpoint: - input: - files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - output: - temp(get_pattern_pars_tmp_channel(config, "pht", "check")), - shell: - "touch {output}" - - -qc_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - cal_files=part.get_filelists(partition, key, intier), - fft_files=part.get_filelists(partition, key, intier, datatype="fft"), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - check_files=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="check", - ), - overwrite_files=get_overwrite_file( - "pht", - timestamp=part.get_timestamp( - pht_par_catalog, - partition, - key, - tier="pht", - ), - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="qc", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="qc", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - time, - name="par_pht_qc", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 30, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--overwrite_files {input.overwrite_files} " - "--pulser_files {input.pulser_files} " - "--fft_files {input.fft_files} " - "--cal_files {input.cal_files}" - - set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc") - - if key in qc_pht_rules: - qc_pht_rules[key].append(list(workflow.rules)[-1]) - else: - qc_pht_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_pht_qc: - input: - cal_files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - fft_files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - check_file=get_pattern_pars_tmp_channel(config, "pht", "check"), - overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")), - plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")), - log: - get_pattern_log_channel(config, "par_pht_qc", time), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--overwrite_files {input.overwrite_files} " - "--pulser_files {input.pulser_files} " - "--fft_files {input.fft_files} " - "--cal_files {input.cal_files}" - - -fallback_qc_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(qc_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_qc_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - - -# This rule builds the energy calibration using the calibration dsp files -rule build_per_energy_calibration: - input: - files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"), - inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"), - ctc_dict=ancient( - lambda wildcards: ParsCatalog.get_par_file( - config, wildcards.timestamp, intier - ) - ), - params: - timestamp="{timestamp}", - datatype="cal", - channel="{channel}", - tier="pht", - output: - ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")), - results_file=temp( - get_pattern_pars_tmp_channel( - config, "pht", "energy_cal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")), - log: - get_pattern_log_channel(config, "par_pht_energy_cal", time), - group: - "par-pht" - resources: - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' - "--log {log} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--configs {configs} " - "--tier {params.tier} " - "--metadata {meta} " - "--plot_path {output.plot_file} " - "--results_path {output.results_file} " - "--save_path {output.ecal_file} " - "--inplot_dict {input.inplots} " - "--in_hit_dict {input.pht_dict} " - "--ctc_dict {input.ctc_dict} " - "--pulser_file {input.pulser} " - "--files {input.files}" - - -part_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - files=part.get_filelists(partition, key, intier), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - ecal_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="energy_cal", - ), - eres_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="energy_cal_objects", - extension="pkl", - ), - inplots=part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="energy_cal", - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ) - ], - partcal_results=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal_objects", - extension="pkl", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - time, - name="par_pht_partcal", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 15, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--metadata {meta} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - set_last_rule_name( - workflow, f"{key}-{partition}-build_pht_energy_super_calibrations" - ) - - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) - else: - part_pht_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_pht_energy_super_calibrations: - input: - files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"), - eres_file=get_pattern_pars_tmp_channel( - config, "pht", "energy_cal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")), - partcal_results=temp( - get_pattern_pars_tmp_channel( - config, "pht", "partcal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")), - log: - get_pattern_log_channel(config, "par_pht_partcal", time), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' - "--log {log} " - "--configs {configs} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--channel {params.channel} " - "--metadata {meta} " - "--inplots {input.inplots} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - -fallback_pht_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(part_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_pht_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - -part_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - files=part.get_filelists(partition, key, intier), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - ecal_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ), - eres_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal_objects", - extension="pkl", - ), - inplots=part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="partcal", - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ) - ], - aoe_results=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal_objects", - extension="pkl", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - time, - name="par_pht_aoe", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 15, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - set_last_rule_name( - workflow, f"{key}-{partition}-build_pht_aoe_calibrations" - ) - - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) - else: - part_pht_rules[key] = [list(workflow.rules)[-1]] - - -# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs -# This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_pht_aoe_calibrations: - input: - files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"), - eres_file=get_pattern_pars_tmp_channel( - config, "pht", "partcal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")), - aoe_results=temp( - get_pattern_pars_tmp_channel( - config, "pht", "aoecal_objects", extension="pkl" - ) - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")), - log: - get_pattern_log_channel(config, "par_pht_aoe_cal", time), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - -fallback_pht_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(part_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_pht_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - -part_pht_rules = {} -for key, dataset in part.datasets.items(): - for partition in dataset.keys(): - - rule: - input: - files=part.get_filelists(partition, key, intier), - pulser_files=[ - str(file).replace("par_pht", "par_tcm") - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="pulser_ids", - ) - ], - ecal_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ), - eres_file=part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal_objects", - extension="pkl", - ), - inplots=part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="aoecal", - ), - wildcard_constraints: - channel=part.get_wildcard_constraints(partition, key), - params: - datatype="cal", - channel="{channel}" if key == "default" else key, - timestamp=part.get_timestamp( - pht_par_catalog, partition, key, tier="pht" - ), - output: - hit_pars=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - ) - ], - lq_results=[ - temp(file) - for file in part.get_par_files( - pht_par_catalog, - partition, - key, - tier="pht", - name="objects", - extension="pkl", - ) - ], - plot_file=[ - temp(file) - for file in part.get_plt_files( - pht_par_catalog, - partition, - key, - tier="pht", - ) - ], - log: - part.get_log_file( - pht_par_catalog, - partition, - key, - "pht", - time, - name="par_pht_lq", - ), - group: - "par-pht" - resources: - mem_swap=len(part.get_filelists(partition, key, intier)) * 15, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration") - - if key in part_pht_rules: - part_pht_rules[key].append(list(workflow.rules)[-1]) - else: - part_pht_rules[key] = [list(workflow.rules)[-1]] - - -# This rule builds the lq calibration using the calibration dsp files for the whole partition -rule build_pht_lq_calibration: - input: - files=os.path.join( - filelist_path(config), - "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", - ), - pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), - ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"), - eres_file=get_pattern_pars_tmp_channel( - config, "pht", "aoecal_objects", extension="pkl" - ), - inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"), - params: - datatype="cal", - channel="{channel}", - timestamp="{timestamp}", - output: - hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")), - lq_results=temp( - get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl") - ), - plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")), - log: - get_pattern_log_channel(config, "par_pht_lq_cal", time), - group: - "par-pht" - resources: - mem_swap=60, - runtime=300, - shell: - f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' - "--log {log} " - "--configs {configs} " - "--metadata {meta} " - "--datatype {params.datatype} " - "--timestamp {params.timestamp} " - "--inplots {input.inplots} " - "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" - - -fallback_pht_rule = list(workflow.rules)[-1] - -rule_order_list = [] -ordered = OrderedDict(part_pht_rules) -ordered.move_to_end("default") -for key, items in ordered.items(): - rule_order_list += [item.name for item in items] -rule_order_list.append(fallback_pht_rule.name) -workflow._ruleorder.add(*rule_order_list) # [::-1] - - include: "channel_merge.smk" diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk new file mode 100644 index 0000000..4e5e126 --- /dev/null +++ b/workflow/rules/pht_pars_geds.smk @@ -0,0 +1,768 @@ +""" +Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps: +- extraction of calibration curves(s) for each run for each channel from cal data +- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data +- combining of all channels into single pars files with associated plot and results files +- running build hit over all channels using par file +""" + +from legenddataflow.pars_loading import ParsCatalog +from legenddataflow.create_pars_keylist import ParsKeyResolve +from pathlib import Path +from legenddataflow.utils import filelist_path, set_last_rule_name +from legenddataflow.patterns import ( + get_pattern_pars_tmp_channel, + get_pattern_plts_tmp_channel, + get_pattern_log_channel, + get_pattern_plts, + get_pattern_tier, + get_pattern_pars_tmp, + get_pattern_log, + get_pattern_pars, +) +from legenddataflow.execenv import execenv_smk_py_script + +pht_par_catalog = ParsKeyResolve.get_par_catalog( + ["-*-*-*-cal"], + get_pattern_tier(config, "raw", check_in_cycle=False), + {"cal": ["par_pht"], "lar": ["par_pht"]}, +) + +intier = "psp" + +qc_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + cal_files=part.get_filelists(partition, key, intier), + fft_files=part.get_filelists(partition, key, intier, datatype="fft"), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + overwrite_files=get_overwrite_file( + "pht", + timestamp=part.get_timestamp( + pht_par_catalog, + partition, + key, + tier="pht", + ), + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="qc", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="qc", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_qc", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 30, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--overwrite_files {input.overwrite_files} " + "--pulser_files {input.pulser_files} " + "--fft_files {input.fft_files} " + "--cal_files {input.cal_files}" + + set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc") + + if key in qc_pht_rules: + qc_pht_rules[key].append(list(workflow.rules)[-1]) + else: + qc_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_qc: + input: + cal_files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + fft_files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")), + log: + get_pattern_log_channel(config, "par_pht_qc", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--save_path {output.hit_pars} " + "--plot_path {output.plot_file} " + "--overwrite_files {input.overwrite_files} " + "--pulser_files {input.pulser_files} " + "--fft_files {input.fft_files} " + "--cal_files {input.cal_files}" + + +fallback_qc_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(qc_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_qc_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + + +# This rule builds the energy calibration using the calibration dsp files +rule build_per_energy_calibration: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"), + inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"), + ctc_dict=ancient( + lambda wildcards: ParsCatalog.get_par_file( + config, wildcards.timestamp, intier + ) + ), + params: + timestamp="{timestamp}", + datatype="cal", + channel="{channel}", + tier="pht", + output: + ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")), + results_file=temp( + get_pattern_pars_tmp_channel( + config, "pht", "energy_cal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")), + log: + get_pattern_log_channel(config, "par_pht_energy_cal", time), + group: + "par-pht" + resources: + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' + "--log {log} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--configs {configs} " + "--tier {params.tier} " + "--metadata {meta} " + "--plot_path {output.plot_file} " + "--results_path {output.results_file} " + "--save_path {output.ecal_file} " + "--inplot_dict {input.inplots} " + "--in_hit_dict {input.pht_dict} " + "--ctc_dict {input.ctc_dict} " + "--pulser_file {input.pulser} " + "--files {input.files}" + + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + files=part.get_filelists(partition, key, intier), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + ecal_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="energy_cal", + ), + eres_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="energy_cal_objects", + extension="pkl", + ), + inplots=part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="energy_cal", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ) + ], + partcal_results=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal_objects", + extension="pkl", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_partcal", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--metadata {meta} " + "--fit_results {output.partcal_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + set_last_rule_name( + workflow, f"{key}-{partition}-build_pht_energy_super_calibrations" + ) + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_energy_super_calibrations: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"), + eres_file=get_pattern_pars_tmp_channel( + config, "pht", "energy_cal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")), + partcal_results=temp( + get_pattern_pars_tmp_channel( + config, "pht", "partcal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")), + log: + get_pattern_log_channel(config, "par_pht_partcal", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' + "--log {log} " + "--configs {configs} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--channel {params.channel} " + "--metadata {meta} " + "--inplots {input.inplots} " + "--fit_results {output.partcal_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + +fallback_pht_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(part_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_pht_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + files=part.get_filelists(partition, key, intier), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + ecal_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ), + eres_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal_objects", + extension="pkl", + ), + inplots=part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="partcal", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ) + ], + aoe_results=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal_objects", + extension="pkl", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_aoe", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--aoe_results {output.aoe_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + set_last_rule_name( + workflow, f"{key}-{partition}-build_pht_aoe_calibrations" + ) + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs +# This rule builds the a/e calibration using the calibration dsp files for the whole partition +rule build_pht_aoe_calibrations: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"), + eres_file=get_pattern_pars_tmp_channel( + config, "pht", "partcal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")), + aoe_results=temp( + get_pattern_pars_tmp_channel( + config, "pht", "aoecal_objects", extension="pkl" + ) + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")), + log: + get_pattern_log_channel(config, "par_pht_aoe_cal", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--aoe_results {output.aoe_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + +fallback_pht_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(part_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_pht_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] + +part_pht_rules = {} +for key, dataset in part.datasets.items(): + for partition in dataset.keys(): + + rule: + input: + files=part.get_filelists(partition, key, intier), + pulser_files=[ + str(file).replace("par_pht", "par_tcm") + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="pulser_ids", + ) + ], + ecal_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ), + eres_file=part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal_objects", + extension="pkl", + ), + inplots=part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="aoecal", + ), + wildcard_constraints: + channel=part.get_wildcard_constraints(partition, key), + params: + datatype="cal", + channel="{channel}" if key == "default" else key, + timestamp=part.get_timestamp( + pht_par_catalog, partition, key, tier="pht" + ), + output: + hit_pars=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + ) + ], + lq_results=[ + temp(file) + for file in part.get_par_files( + pht_par_catalog, + partition, + key, + tier="pht", + name="objects", + extension="pkl", + ) + ], + plot_file=[ + temp(file) + for file in part.get_plt_files( + pht_par_catalog, + partition, + key, + tier="pht", + ) + ], + log: + part.get_log_file( + pht_par_catalog, + partition, + key, + "pht", + time, + name="par_pht_lq", + ), + group: + "par-pht" + resources: + mem_swap=len(part.get_filelists(partition, key, intier)) * 15, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--lq_results {output.lq_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration") + + if key in part_pht_rules: + part_pht_rules[key].append(list(workflow.rules)[-1]) + else: + part_pht_rules[key] = [list(workflow.rules)[-1]] + + +# This rule builds the lq calibration using the calibration dsp files for the whole partition +rule build_pht_lq_calibration: + input: + files=os.path.join( + filelist_path(config), + "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist", + ), + pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"), + ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"), + eres_file=get_pattern_pars_tmp_channel( + config, "pht", "aoecal_objects", extension="pkl" + ), + inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"), + params: + datatype="cal", + channel="{channel}", + timestamp="{timestamp}", + output: + hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")), + lq_results=temp( + get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl") + ), + plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")), + log: + get_pattern_log_channel(config, "par_pht_lq_cal", time), + group: + "par-pht" + resources: + mem_swap=60, + runtime=300, + shell: + f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' + "--log {log} " + "--configs {configs} " + "--metadata {meta} " + "--datatype {params.datatype} " + "--timestamp {params.timestamp} " + "--inplots {input.inplots} " + "--channel {params.channel} " + "--lq_results {output.lq_results} " + "--eres_file {input.eres_file} " + "--hit_pars {output.hit_pars} " + "--plot_file {output.plot_file} " + "--ecal_file {input.ecal_file} " + "--pulser_files {input.pulser_files} " + "--input_files {input.files}" + + +fallback_pht_rule = list(workflow.rules)[-1] + +rule_order_list = [] +ordered = OrderedDict(part_pht_rules) +ordered.move_to_end("default") +for key, items in ordered.items(): + rule_order_list += [item.name for item in items] +rule_order_list.append(fallback_pht_rule.name) +workflow._ruleorder.add(*rule_order_list) # [::-1] diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_pars_geds_fast.smk similarity index 100% rename from workflow/rules/pht_fast.smk rename to workflow/rules/pht_pars_geds_fast.smk diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index ab2e70f..e264ca4 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -22,17 +22,11 @@ psp_par_catalog = ParsKeyResolve.get_par_catalog( {"cal": ["par_psp"], "lar": ["par_psp"]}, ) -psp_par_cat_file = Path(pars_path(config)) / "psp" / "validity.yaml" -if psp_par_cat_file.is_file(): - psp_par_cat_file.unlink() -Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True) -ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file) - include: "channel_merge.smk" -build_merge_rules("psp", lh5_merge=True) +build_merge_rules("psp", lh5_merge=True, lh5_tier="dsp") rule build_psp: diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 10d9ab1..8d53220 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -4,8 +4,8 @@ Snakemake rules for processing psp (partition dsp) tier data. - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data """ -from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.utils import set_last_rule_name +from legenddataflow.create_pars_keylist import ParsKeyResolve from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, @@ -22,6 +22,7 @@ psp_par_catalog = ParsKeyResolve.get_par_catalog( {"cal": ["par_psp"], "lar": ["par_psp"]}, ) + psp_rules = {} for key, dataset in part.datasets.items(): for partition in dataset.keys(): @@ -119,7 +120,7 @@ for key, dataset in part.datasets.items(): # Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs # This rule builds the a/e calibration using the calibration dsp files for the whole partition -rule build_par_psp: +rule build_par_psp_fallback: input: dsp_pars=get_pattern_pars_tmp_channel(config, "dsp", "eopt"), dsp_objs=get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl"), From 4dcdf97cd2f4834d6f445996a51dc8d8daf12898 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 20:20:14 +0100 Subject: [PATCH 091/101] test try 2 --- tests/test_util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_util.py b/tests/test_util.py index 4041614..53b1b00 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -14,7 +14,6 @@ with (testprod / "config.yaml").open() as r: setup = yaml.safe_load(r) subst_vars(setup, var_values={"_": str(testprod)}) -setup = setup["setups"]["test"] def test_util(): From b58601d75bc28216b414bc696f7eb55c96e5f08e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 4 Feb 2025 20:39:06 +0100 Subject: [PATCH 092/101] tests v3 --- tests/dummy_cycle/config.yaml | 2 +- tests/test_util.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/dummy_cycle/config.yaml b/tests/dummy_cycle/config.yaml index a40938d..97de306 100644 --- a/tests/dummy_cycle/config.yaml +++ b/tests/dummy_cycle/config.yaml @@ -1,6 +1,6 @@ paths: sandbox_path: "" - tier_daq: $_/generated/tier/daq + tier_daq: $_/input_data/tier/daq tier_raw_blind: "" workflow: $_/workflow diff --git a/tests/test_util.py b/tests/test_util.py index 53b1b00..9d3c424 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,3 +1,4 @@ +from datetime import datetime from pathlib import Path import yaml @@ -18,7 +19,10 @@ def test_util(): assert utils.tier_path(setup) == str(testprod / "generated/tier") - assert utils.unix_time("20230101T123456Z") == 1672572896.0 + time = datetime.now() + assert int(utils.unix_time(time.strftime("%Y%m%dT%H%M%SZ"))) == int( + time.timestamp() + ) def test_filekey(): @@ -41,7 +45,7 @@ def test_filekey(): assert ( FileKey.get_filekey_from_pattern( key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0], - utils.get_tier_path(setup, "dsp"), + patterns.get_pattern_tier(setup, "dsp"), ).name == key.name ) @@ -70,9 +74,10 @@ def test_create_pars_keylist(): "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml", "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml", } - keylist = sorted( - ParsKeyResolve.get_keys("-*-*-*-cal", patterns.get_pattern_tier_daq(setup)), + ParsKeyResolve.get_keys( + "-*-*-*-cal", patterns.get_pattern_tier_daq(setup, extension="*") + ), key=FileKey.get_unix_timestamp, ) assert keylist == [ From ed1586d4a1ec90d051005c6422e9fb574ec97aa4 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 4 Feb 2025 23:21:44 +0100 Subject: [PATCH 093/101] use dashes not underscores in executable names --- pyproject.toml | 64 ++++++++++++------------- workflow/rules/ann.smk | 4 +- workflow/rules/blinding_calibration.smk | 6 +-- workflow/rules/blinding_check.smk | 6 +-- workflow/rules/chanlist_gen.smk | 2 +- workflow/rules/channel_merge.smk | 8 ++-- workflow/rules/dsp.smk | 2 +- workflow/rules/dsp_pars_geds.smk | 14 +++--- workflow/rules/evt.smk | 4 +- workflow/rules/hit.smk | 2 +- workflow/rules/hit_pars_geds.smk | 8 ++-- workflow/rules/pht.smk | 2 +- workflow/rules/pht_pars_geds.smk | 18 +++---- workflow/rules/pht_pars_geds_fast.smk | 4 +- workflow/rules/psp.smk | 2 +- workflow/rules/psp_pars_geds.smk | 8 ++-- workflow/rules/qc_phy.smk | 8 ++-- workflow/rules/raw.smk | 6 +-- workflow/rules/skm.smk | 2 +- workflow/rules/tcm.smk | 4 +- 20 files changed, 87 insertions(+), 87 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 86f7d5b..3aae00f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,38 +83,38 @@ docs = [ ] [project.scripts] -dataprod = "legenddataflow.execenv:dataprod" -create_chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist" -merge_channels = "legenddataflow.scripts.merge_channels:merge_channels" -build_filedb = "legenddataflow.scripts.build_filedb:build_filedb" -build_tier_dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp" -build_tier_evt = "legenddataflow.scripts.tier.evt:build_tier_evt" -build_tier_hit = "legenddataflow.scripts.tier.hit:build_tier_hit" -build_tier_raw_blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind" -build_tier_raw_fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio" -build_tier_raw_orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca" -build_tier_skm = "legenddataflow.scripts.tier.skm:build_tier_skm" -build_tier_tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm" -par_geds_dsp_dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms" -par_geds_dsp_eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt" -par_geds_dsp_evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel" -par_geds_dsp_nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt" -par_geds_dsp_svm_build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build" -par_geds_dsp_svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm" -par_geds_dsp_tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau" -par_geds_hit_aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe" -par_geds_hit_ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal" -par_geds_hit_lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq" -par_geds_hit_qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc" -par_geds_pht_aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe" -par_geds_pht_ecal_part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part" -par_geds_pht_fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast" -par_geds_pht_qc_phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy" -par_geds_pht_qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc" -par_geds_psp_average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average" -par_geds_raw_blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal" -par_geds_raw_blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck" -par_geds_tcm_pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser" +dataprod = "legenddataflow.execenv:dataprod" +create-chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist" +merge-channels = "legenddataflow.scripts.merge_channels:merge_channels" +build-filedb = "legenddataflow.scripts.build_filedb:build_filedb" +build-tier-dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp" +build-tier-evt = "legenddataflow.scripts.tier.evt:build_tier_evt" +build-tier-hit = "legenddataflow.scripts.tier.hit:build_tier_hit" +build-tier-raw-blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind" +build-tier-raw-fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio" +build-tier-raw-orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca" +build-tier-skm = "legenddataflow.scripts.tier.skm:build_tier_skm" +build-tier-tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm" +par-geds-dsp-dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms" +par-geds-dsp-eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt" +par-geds-dsp-evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel" +par-geds-dsp-nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt" +par-geds-dsp-svm-build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build" +par-geds-dsp-svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm" +par-geds-dsp-tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau" +par-geds-hit-aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe" +par-geds-hit-ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal" +par-geds-hit-lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq" +par-geds-hit-qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc" +par-geds-pht-aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe" +par-geds-pht-ecal-part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part" +par-geds-pht-fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast" +par-geds-pht-qc-phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy" +par-geds-pht-qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc" +par-geds-psp-average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average" +par-geds-raw-blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal" +par-geds-raw-blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck" +par-geds-tcm-pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser" [tool.uv.workspace] exclude = ["generated", "inputs", "software", "workflow"] diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index 8e7429f..b1f5edf 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -30,7 +30,7 @@ rule build_ann: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - f'{execenv_smk_py_script(config, "build_tier_dsp")}' + f'{execenv_smk_py_script(config, "build-tier-dsp")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -61,7 +61,7 @@ rule build_pan: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - f'{execenv_smk_py_script(config, "build_tier_dsp")}' + f'{execenv_smk_py_script(config, "build-tier-dsp")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index 1a69313..fce7b11 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -38,7 +38,7 @@ rule build_blinding_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_raw_blindcal")}' + f'{execenv_smk_py_script(config, "par-geds-raw-blindcal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -66,7 +66,7 @@ rule build_plts_blinding: group: "merge-blindcal" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input} " "--output {output} " @@ -88,6 +88,6 @@ rule build_pars_blinding: group: "merge-blindcal" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index bd9b796..b5ec5b4 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -39,7 +39,7 @@ rule build_blinding_check: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_raw_blindcheck")}' + f'{execenv_smk_py_script(config, "par-geds-raw-blindcheck")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -67,7 +67,7 @@ rule build_plts_raw: group: "merge-raw" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input} " "--output {output} " @@ -91,4 +91,4 @@ rule build_pars_raw: group: "merge-raw" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index b6a3ea8..abee65a 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -23,7 +23,7 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps): f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = execenv_smk_py_script(config, "create_chankeylist") + cmd = execenv_smk_py_script(config, "create-chankeylist") cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " cmd += f"--datatype cal --output_file {output_file}" os.system(cmd) diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk index b970840..8ba185d 100644 --- a/workflow/rules/channel_merge.smk +++ b/workflow/rules/channel_merge.smk @@ -31,7 +31,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): group: f"merge-{tier}" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input} " "--output {output} " "--channelmap {meta} " @@ -64,7 +64,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): group: f"merge-{tier}" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -97,7 +97,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): group: f"merge-{tier}" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -144,7 +144,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): f"merge-{tier}" run: shell_string = ( - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--output {output.out_file} " "--input {input.in_files} " "--timestamp {params.timestamp} " diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index f296716..9acf3ae 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -52,7 +52,7 @@ rule build_dsp: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - f'{execenv_smk_py_script(config, "build_tier_dsp")}' + f'{execenv_smk_py_script(config, "build-tier-dsp")}' "--log {log} " "--tier dsp " f"--configs {ro(configs)} " diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 86b8342..2dc6d47 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -36,7 +36,7 @@ rule build_pars_dsp_tau_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_tau")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-tau")}' "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -70,7 +70,7 @@ rule build_pars_evtsel_geds: runtime=300, mem_swap=70, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_evtsel")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-evtsel")}' "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -107,7 +107,7 @@ rule build_pars_dsp_nopt_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_nopt")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-nopt")}' "--database {input.database} " "--configs {configs} " "--log {log} " @@ -144,7 +144,7 @@ rule build_pars_dsp_dplms_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_dplms")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-dplms")}' "--fft_raw_filelist {input.fft_files} " "--peak_file {input.peak_file} " "--database {input.database} " @@ -182,7 +182,7 @@ rule build_pars_dsp_eopt_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_eopt")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-eopt")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -213,7 +213,7 @@ rule build_svm_dsp_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}' "--log {log} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " @@ -233,7 +233,7 @@ rule build_pars_dsp_svm_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}' "--log {log} " "--input_file {input.dsp_pars} " "--output_file {output.dsp_pars} " diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index 1bcb2a4..260fc31 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -44,7 +44,7 @@ rule build_evt: mem_swap=50, run: shell_string = ( - f'{execenv_smk_py_script(config, "build_tier_evt")}' + f'{execenv_smk_py_script(config, "build-tier-evt")}' f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " @@ -96,7 +96,7 @@ rule build_pet: mem_swap=50, run: shell_string = ( - f'{execenv_smk_py_script(config, "build_tier_evt")}' + f'{execenv_smk_py_script(config, "build-tier-evt")}' f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 5d83174..a6cf3c0 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -50,7 +50,7 @@ rule build_hit: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "build_tier_hit")}' + f'{execenv_smk_py_script(config, "build-tier-hit")}' f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk index 8143f82..7db1fcc 100644 --- a/workflow/rules/hit_pars_geds.smk +++ b/workflow/rules/hit_pars_geds.smk @@ -46,7 +46,7 @@ rule build_qc: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_hit_qc")}' + f'{execenv_smk_py_script(config, "par-geds-hit-qc")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -94,7 +94,7 @@ rule build_energy_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' + f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -142,7 +142,7 @@ rule build_aoe_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}' + f'{execenv_smk_py_script(config, "par-geds-hit-aoe")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -188,7 +188,7 @@ rule build_lq_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_hit_lq")}' + f'{execenv_smk_py_script(config, "par-geds-hit-lq")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index fa85971..402ab8d 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -53,7 +53,7 @@ rule build_pht: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "build_tier_hit")}' + f'{execenv_smk_py_script(config, "build-tier-hit")}' f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk index 4e5e126..50b6972 100644 --- a/workflow/rules/pht_pars_geds.smk +++ b/workflow/rules/pht_pars_geds.smk @@ -101,7 +101,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 30, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' + f'{execenv_smk_py_script(config, "par-geds-pht-qc")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -152,7 +152,7 @@ rule build_pht_qc: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_qc")}' + f'{execenv_smk_py_script(config, "par-geds-pht-qc")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -213,7 +213,7 @@ rule build_per_energy_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}' + f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}' "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -325,7 +325,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' + f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -385,7 +385,7 @@ rule build_pht_energy_super_calibrations: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}' + f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -506,7 +506,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' + f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -566,7 +566,7 @@ rule build_pht_aoe_calibrations: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}' + f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -685,7 +685,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' + f'{execenv_smk_py_script(config, "par-geds-pht-lq")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -740,7 +740,7 @@ rule build_pht_lq_calibration: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_lq")}' + f'{execenv_smk_py_script(config, "par-geds-pht-lq")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/pht_pars_geds_fast.smk b/workflow/rules/pht_pars_geds_fast.smk index 2379753..26aca7e 100644 --- a/workflow/rules/pht_pars_geds_fast.smk +++ b/workflow/rules/pht_pars_geds_fast.smk @@ -106,7 +106,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 12, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_fast")}' + f'{execenv_smk_py_script(config, "par-geds-pht-fast")}' "--log {log} " "--configs {configs} " "--metadata {meta} " @@ -164,7 +164,7 @@ rule par_pht_fast: mem_swap=50, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_fast")}' + f'{execenv_smk_py_script(config, "par-geds-pht-fast")}' "--log {log} " "--configs {configs} " "--metadata {meta} " diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index e264ca4..1f6d36f 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -52,7 +52,7 @@ rule build_psp: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - f'{execenv_smk_py_script(config, "build_tier_dsp")}' + f'{execenv_smk_py_script(config, "build-tier-dsp")}' "--log {log} " "--tier psp " f"--configs {ro(configs)} " diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 8d53220..8f6ee77 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -97,7 +97,7 @@ for key, dataset in part.datasets.items(): resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_psp_average")}' + f'{execenv_smk_py_script(config, "par-geds-psp-average")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -142,7 +142,7 @@ rule build_par_psp_fallback: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_psp_average")}' + f'{execenv_smk_py_script(config, "par-geds-psp-average")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -183,7 +183,7 @@ rule build_svm_psp: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}' "--log {log} " "--train_data {input.train_data} " "--train_hyperpars {input.hyperpars} " @@ -203,7 +203,7 @@ rule build_pars_psp_svm: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}' + f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}' "--log {log} " "--input_file {input.dsp_pars} " "--output_file {output.dsp_pars} " diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index a5cd954..8d6250e 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -67,7 +67,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}' + f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -108,7 +108,7 @@ rule build_pht_qc_phy: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}' + f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}' "--log {log} " "--configs {configs} " "--datatype {params.datatype} " @@ -146,7 +146,7 @@ rule build_plts_pht_phy: group: "merge-hit" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input} " "--output {output} " @@ -168,6 +168,6 @@ rule build_pars_pht_phy: group: "merge-hit" shell: - f'{execenv_smk_py_script(config, "merge_channels")}' + f'{execenv_smk_py_script(config, "merge-channels")}' "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index b0040fd..2411c14 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -40,7 +40,7 @@ rule build_raw_orca: mem_swap=110, runtime=300, shell: - f'{execenv_smk_py_script(config, "build_tier_raw_orca")}' + f'{execenv_smk_py_script(config, "build-tier-raw-orca")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " @@ -69,7 +69,7 @@ rule build_raw_fcio: mem_swap=110, runtime=300, shell: - f'{execenv_smk_py_script(config, "build_tier_raw_fcio")}' + f'{execenv_smk_py_script(config, "build-tier-raw-fcio")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " @@ -104,7 +104,7 @@ rule build_raw_blind: mem_swap=110, runtime=300, shell: - f'{execenv_smk_py_script(config, "build_tier_raw_blind")}' + f'{execenv_smk_py_script(config, "build-tier-raw-blind")}' "--log {log} " f"--configs {ro(configs)} " f"--chan_maps {ro(chan_maps)} " diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index 7a4a686..3f38c3b 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -27,7 +27,7 @@ rule build_skm: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "build_tier_skm")}' + f'{execenv_smk_py_script(config, "build-tier-skm")}' f"--configs {ro(configs)} " "--timestamp {params.timestamp} " "--log {log} " diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index afb080c..b954bf3 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -29,7 +29,7 @@ rule build_tier_tcm: runtime=300, mem_swap=20, shell: - f'{execenv_smk_py_script(config, "build_tier_tcm")}' + f'{execenv_smk_py_script(config, "build-tier-tcm")}' "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " @@ -57,7 +57,7 @@ rule build_pulser_ids: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par_geds_tcm_pulser")}' + f'{execenv_smk_py_script(config, "par-geds-tcm-pulser")}' "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " From 16d03a2e798480e21cb314effacab17cc2f438a1 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 4 Feb 2025 23:37:23 +0100 Subject: [PATCH 094/101] replace underscores with dashes in cmdline options --- workflow/rules/ann.smk | 8 +- workflow/rules/blinding_calibration.smk | 4 +- workflow/rules/blinding_check.smk | 4 +- workflow/rules/chanlist_gen.smk | 4 +- workflow/rules/channel_merge.smk | 4 +- workflow/rules/dsp.smk | 4 +- workflow/rules/dsp_pars_geds.smk | 56 ++++---- workflow/rules/evt.smk | 24 ++-- workflow/rules/hit.smk | 4 +- workflow/rules/hit_pars_geds.smk | 50 +++---- workflow/rules/pht.smk | 4 +- workflow/rules/pht_pars_geds.smk | 122 +++++++++--------- workflow/rules/pht_pars_geds_fast.smk | 28 ++-- workflow/rules/psp.smk | 4 +- workflow/rules/psp_pars_geds.smk | 28 ++-- workflow/rules/qc_phy.smk | 12 +- workflow/rules/raw.smk | 8 +- workflow/rules/skm.smk | 2 +- workflow/rules/tcm.smk | 4 +- .../scripts/create_chankeylist.py | 4 +- .../legenddataflow/scripts/merge_channels.py | 4 +- .../scripts/par/geds/dsp/dplms.py | 10 +- .../scripts/par/geds/dsp/eopt.py | 8 +- .../scripts/par/geds/dsp/evtsel.py | 10 +- .../scripts/par/geds/dsp/nopt.py | 6 +- .../scripts/par/geds/dsp/svm.py | 4 +- .../scripts/par/geds/dsp/svm_build.py | 6 +- .../scripts/par/geds/dsp/tau.py | 10 +- .../scripts/par/geds/hit/aoe.py | 14 +- .../scripts/par/geds/hit/ecal.py | 16 +-- .../legenddataflow/scripts/par/geds/hit/lq.py | 14 +- .../legenddataflow/scripts/par/geds/hit/qc.py | 14 +- .../scripts/par/geds/pht/aoe.py | 16 +-- .../scripts/par/geds/pht/ecal_part.py | 16 +-- .../scripts/par/geds/pht/fast.py | 16 +-- .../legenddataflow/scripts/par/geds/pht/lq.py | 16 +-- .../legenddataflow/scripts/par/geds/pht/qc.py | 14 +- .../scripts/par/geds/pht/qc_phy.py | 6 +- .../scripts/par/geds/psp/average.py | 8 +- .../scripts/par/geds/raw/blindcal.py | 4 +- .../scripts/par/geds/raw/blindcheck.py | 4 +- .../scripts/par/geds/tcm/pulser.py | 4 +- .../src/legenddataflow/scripts/tier/dsp.py | 4 +- .../src/legenddataflow/scripts/tier/evt.py | 12 +- .../src/legenddataflow/scripts/tier/hit.py | 4 +- .../legenddataflow/scripts/tier/raw_blind.py | 4 +- .../legenddataflow/scripts/tier/raw_fcio.py | 2 +- .../legenddataflow/scripts/tier/raw_orca.py | 2 +- .../src/legenddataflow/scripts/tier/skm.py | 2 +- 49 files changed, 314 insertions(+), 314 deletions(-) diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index b1f5edf..5cdd016 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -39,8 +39,8 @@ rule build_ann: "--timestamp {params.timestamp} " "--input {input.dsp_file} " "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {input.pars_file} " + "--db-file {output.db_file} " + "--pars-file {input.pars_file} " rule build_pan: @@ -70,5 +70,5 @@ rule build_pan: "--timestamp {params.timestamp} " "--input {input.dsp_file} " "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {input.pars_file} " + "--db-file {output.db_file} " + "--pars-file {input.pars_file} " diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index fce7b11..31e71a8 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -45,8 +45,8 @@ rule build_blinding_calibration: "--channel {params.channel} " "--configs {configs} " "--meta {params.meta} " - "--plot_file {output.plot_file} " - "--blind_curve {output.par_file} " + "--plot-file {output.plot_file} " + "--blind-curve {output.par_file} " "--files {input.files} " diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index b5ec5b4..2bee385 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -47,8 +47,8 @@ rule build_blinding_check: "--configs {configs} " "--metadata {meta} " "--output {output.check_file} " - "--blind_curve {input.par_file} " - "--plot_file {output.plot_file} " + "--blind-curve {input.par_file} " + "--plot-file {output.plot_file} " "--files {input.files} " diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index abee65a..750104b 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -24,8 +24,8 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps): ) cmd = execenv_smk_py_script(config, "create-chankeylist") - cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " - cmd += f"--datatype cal --output_file {output_file}" + cmd += f" --det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " + cmd += f"--datatype cal --output-file {output_file}" os.system(cmd) with open(output_file) as r: diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk index 8ba185d..b221fc3 100644 --- a/workflow/rules/channel_merge.smk +++ b/workflow/rules/channel_merge.smk @@ -152,8 +152,8 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): ) if lh5_merge is True: shell_string += ( - "--in_db {input.in_db} " - "--out_db {output.out_db} " + "--in-db {input.in_db} " + "--out-db {output.out_db} " ) shell(shell_string) diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index 9acf3ae..20c5d38 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -61,5 +61,5 @@ rule build_dsp: "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {params.ro_input[pars_file]} " + "--db-file {output.db_file} " + "--pars-file {params.ro_input[pars_file]} " diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 2dc6d47..8d1f075 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -42,10 +42,10 @@ rule build_pars_dsp_tau_geds: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--plot_path {output.plots} " - "--output_file {output.decay_const} " - "--pulser_file {input.pulser} " - "--raw_files {input.files}" + "--plot-path {output.plots} " + "--output-file {output.decay_const} " + "--pulser-file {input.pulser} " + "--raw-files {input.files}" rule build_pars_evtsel_geds: @@ -76,11 +76,11 @@ rule build_pars_evtsel_geds: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--peak_file {output.peak_file} " - "--pulser_file {input.pulser_file} " - "--decay_const {input.database} " - "--raw_cal {input.raw_cal} " - "--raw_filelist {input.files}" + "--peak-file {output.peak_file} " + "--pulser-file {input.pulser_file} " + "--decay-const {input.database} " + "--raw-cal {input.raw_cal} " + "--raw-filelist {input.files}" # This rule builds the optimal energy filter parameters for the dsp using fft files @@ -115,9 +115,9 @@ rule build_pars_dsp_nopt_geds: "--timestamp {params.timestamp} " "--channel {params.channel} " "--inplots {input.inplots} " - "--plot_path {output.plots} " - "--dsp_pars {output.dsp_pars_nopt} " - "--raw_filelist {input.files}" + "--plot-path {output.plots} " + "--dsp-pars {output.dsp_pars_nopt} " + "--raw-filelist {input.files}" # This rule builds the dplms energy filter for the dsp using fft and cal files @@ -145,8 +145,8 @@ rule build_pars_dsp_dplms_geds: runtime=300, shell: f'{execenv_smk_py_script(config, "par-geds-dsp-dplms")}' - "--fft_raw_filelist {input.fft_files} " - "--peak_file {input.peak_file} " + "--fft-raw-filelist {input.fft_files} " + "--peak-file {input.peak_file} " "--database {input.database} " "--inplots {input.inplots} " "--configs {configs} " @@ -154,9 +154,9 @@ rule build_pars_dsp_dplms_geds: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--dsp_pars {output.dsp_pars} " - "--lh5_path {output.lh5_path} " - "--plot_path {output.plots} " + "--dsp-pars {output.dsp_pars} " + "--lh5-path {output.lh5_path} " + "--plot-path {output.plots} " # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files @@ -188,12 +188,12 @@ rule build_pars_dsp_eopt_geds: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--peak_file {input.peak_file} " + "--peak-file {input.peak_file} " "--inplots {input.inplots} " - "--decay_const {input.decay_const} " - "--plot_path {output.plots} " - "--qbb_grid_path {output.qbb_grid} " - "--final_dsp_pars {output.dsp_pars}" + "--decay-const {input.decay_const} " + "--plot-path {output.plots} " + "--qbb-grid-path {output.qbb_grid} " + "--final-dsp-pars {output.dsp_pars}" rule build_svm_dsp_geds: @@ -215,9 +215,9 @@ rule build_svm_dsp_geds: shell: f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}' "--log {log} " - "--train_data {input.train_data} " - "--train_hyperpars {input.hyperpars} " - "--output_file {output.dsp_pars}" + "--train-data {input.train_data} " + "--train-hyperpars {input.hyperpars} " + "--output-file {output.dsp_pars}" rule build_pars_dsp_svm_geds: @@ -235,6 +235,6 @@ rule build_pars_dsp_svm_geds: shell: f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}' "--log {log} " - "--input_file {input.dsp_pars} " - "--output_file {output.dsp_pars} " - "--svm_file {input.svm_file}" + "--input-file {input.dsp_pars} " + "--output-file {output.dsp_pars} " + "--svm-file {input.svm_file}" diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index 260fc31..cc72249 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -51,15 +51,15 @@ rule build_evt: "--tier {params.tier} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--xtc_file {params.ro_input[xtalk_matrix]} " - "--par_files {params.ro_input[par_files]} " - "--hit_file {params.ro_input[hit_file]} " - "--tcm_file {params.ro_input[tcm_file]} " - "--dsp_file {params.ro_input[dsp_file]} " + "--xtc-file {params.ro_input[xtalk_matrix]} " + "--par-files {params.ro_input[par_files]} " + "--hit-file {params.ro_input[hit_file]} " + "--tcm-file {params.ro_input[tcm_file]} " + "--dsp-file {params.ro_input[dsp_file]} " "--output {output} " ) if input.ann_file is not None: - shell_string += "--ann_file {params.ro_input[ann_file]} " + shell_string += "--ann-file {params.ro_input[ann_file]} " shell(shell_string) @@ -103,15 +103,15 @@ rule build_pet: "--tier {params.tier} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--xtc_file {params.ro_input[xtalk_matrix]} " - "--par_files {params.ro_input[par_files]} " - "--hit_file {params.ro_input[hit_file]} " - "--tcm_file {params.ro_input[tcm_file]} " - "--dsp_file {params.ro_input[dsp_file]} " + "--xtc-file {params.ro_input[xtalk_matrix]} " + "--par-files {params.ro_input[par_files]} " + "--hit-file {params.ro_input[hit_file]} " + "--tcm-file {params.ro_input[tcm_file]} " + "--dsp-file {params.ro_input[dsp_file]} " "--output {output} " ) if input.ann_file is not None: - shell_string += "--ann_file {params.ro_input[ann_file]} " + shell_string += "--ann-file {params.ro_input[ann_file]} " shell(shell_string) diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index a6cf3c0..469b0f5 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -57,7 +57,7 @@ rule build_hit: "--tier {params.tier} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--pars_file {params.ro_input[pars_file]} " + "--pars-file {params.ro_input[pars_file]} " "--output {output.tier_file} " "--input {params.ro_input[dsp_file]} " - "--db_file {output.db_file}" + "--db-file {output.db_file}" diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk index 7db1fcc..0b0aef6 100644 --- a/workflow/rules/hit_pars_geds.smk +++ b/workflow/rules/hit_pars_geds.smk @@ -53,12 +53,12 @@ rule build_qc: "--channel {params.channel} " "--configs {configs} " "--metadata {meta} " - "--plot_path {output.plot_file} " - "--save_path {output.qc_file} " - "--pulser_file {input.pulser} " - "--cal_files {input.files} " - "--fft_files {input.fft_files} " - "--overwrite_files {input.overwrite_files} " + "--plot-path {output.plot_file} " + "--save-path {output.qc_file} " + "--pulser-file {input.pulser} " + "--cal-files {input.files} " + "--fft-files {input.fft_files} " + "--overwrite-files {input.overwrite_files} " # This rule builds the energy calibration using the calibration dsp files @@ -101,13 +101,13 @@ rule build_energy_calibration: "--channel {params.channel} " "--configs {configs} " "--metadata {meta} " - "--plot_path {output.plot_file} " - "--results_path {output.results_file} " - "--save_path {output.ecal_file} " - "--inplot_dict {input.inplots} " - "--in_hit_dict {input.in_hit_dict} " - "--ctc_dict {input.ctc_dict} " - "--pulser_file {input.pulser} " + "--plot-path {output.plot_file} " + "--results-path {output.results_file} " + "--save-path {output.ecal_file} " + "--inplot-dict {input.inplots} " + "--in-hit-dict {input.in_hit_dict} " + "--ctc-dict {input.ctc_dict} " + "--pulser-file {input.pulser} " "--files {input.files}" @@ -150,12 +150,12 @@ rule build_aoe_calibration: "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--pulser_file {input.pulser} " - "--ecal_file {input.ecal_file} " + "--aoe-results {output.aoe_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--pulser-file {input.pulser} " + "--ecal-file {input.ecal_file} " "{input.files}" @@ -196,10 +196,10 @@ rule build_lq_calibration: "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--pulser_file {input.pulser} " - "--ecal_file {input.ecal_file} " + "--lq-results {output.lq_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--pulser-file {input.pulser} " + "--ecal-file {input.ecal_file} " "{input.files}" diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 402ab8d..447cee0 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -60,7 +60,7 @@ rule build_pht: "--tier {params.tier} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--pars_file {params.ro_input[pars_file]} " + "--pars-file {params.ro_input[pars_file]} " "--output {output.tier_file} " "--input {params.ro_input[dsp_file]} " - "--db_file {output.db_file}" + "--db-file {output.db_file}" diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk index 50b6972..cec57b5 100644 --- a/workflow/rules/pht_pars_geds.smk +++ b/workflow/rules/pht_pars_geds.smk @@ -108,12 +108,12 @@ for key, dataset in part.datasets.items(): "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--overwrite_files {input.overwrite_files} " - "--pulser_files {input.pulser_files} " - "--fft_files {input.fft_files} " - "--cal_files {input.cal_files}" + "--save-path {output.hit_pars} " + "--plot-path {output.plot_file} " + "--overwrite-files {input.overwrite_files} " + "--pulser-files {input.pulser_files} " + "--fft-files {input.fft_files} " + "--cal-files {input.cal_files}" set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc") @@ -159,12 +159,12 @@ rule build_pht_qc: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--overwrite_files {input.overwrite_files} " - "--pulser_files {input.pulser_files} " - "--fft_files {input.fft_files} " - "--cal_files {input.cal_files}" + "--save-path {output.hit_pars} " + "--plot-path {output.plot_file} " + "--overwrite-files {input.overwrite_files} " + "--pulser-files {input.pulser_files} " + "--fft-files {input.fft_files} " + "--cal-files {input.cal_files}" fallback_qc_rule = list(workflow.rules)[-1] @@ -221,13 +221,13 @@ rule build_per_energy_calibration: "--configs {configs} " "--tier {params.tier} " "--metadata {meta} " - "--plot_path {output.plot_file} " - "--results_path {output.results_file} " - "--save_path {output.ecal_file} " - "--inplot_dict {input.inplots} " - "--in_hit_dict {input.pht_dict} " - "--ctc_dict {input.ctc_dict} " - "--pulser_file {input.pulser} " + "--plot-path {output.plot_file} " + "--results-path {output.results_file} " + "--save-path {output.ecal_file} " + "--inplot-dict {input.inplots} " + "--in-hit-dict {input.pht_dict} " + "--ctc-dict {input.ctc_dict} " + "--pulser-file {input.pulser} " "--files {input.files}" @@ -333,13 +333,13 @@ for key, dataset in part.datasets.items(): "--inplots {input.inplots} " "--channel {params.channel} " "--metadata {meta} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--fit-results {output.partcal_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" set_last_rule_name( workflow, f"{key}-{partition}-build_pht_energy_super_calibrations" @@ -393,13 +393,13 @@ rule build_pht_energy_super_calibrations: "--channel {params.channel} " "--metadata {meta} " "--inplots {input.inplots} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--fit-results {output.partcal_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" fallback_pht_rule = list(workflow.rules)[-1] @@ -514,13 +514,13 @@ for key, dataset in part.datasets.items(): "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--aoe-results {output.aoe_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" set_last_rule_name( workflow, f"{key}-{partition}-build_pht_aoe_calibrations" @@ -574,13 +574,13 @@ rule build_pht_aoe_calibrations: "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " - "--aoe_results {output.aoe_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--aoe-results {output.aoe_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" fallback_pht_rule = list(workflow.rules)[-1] @@ -693,13 +693,13 @@ for key, dataset in part.datasets.items(): "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--lq-results {output.lq_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration") @@ -748,13 +748,13 @@ rule build_pht_lq_calibration: "--timestamp {params.timestamp} " "--inplots {input.inplots} " "--channel {params.channel} " - "--lq_results {output.lq_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--lq-results {output.lq_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" fallback_pht_rule = list(workflow.rules)[-1] diff --git a/workflow/rules/pht_pars_geds_fast.smk b/workflow/rules/pht_pars_geds_fast.smk index 26aca7e..c6e0232 100644 --- a/workflow/rules/pht_pars_geds_fast.smk +++ b/workflow/rules/pht_pars_geds_fast.smk @@ -115,13 +115,13 @@ for key, dataset in part.datasets.items(): "--inplots {input.inplots} " "--channel {params.channel} " "--metadata {meta} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--fit-results {output.partcal_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" set_last_rule_name(workflow, f"{key}-{partition}-par_pht_fast") slow_rule = workflow._rules[f"{key}-{partition}-build_pht_lq_calibration"] @@ -173,13 +173,13 @@ rule par_pht_fast: "--channel {params.channel} " "--metadata {meta} " "--inplots {input.inplots} " - "--fit_results {output.partcal_results} " - "--eres_file {input.eres_file} " - "--hit_pars {output.hit_pars} " - "--plot_file {output.plot_file} " - "--ecal_file {input.ecal_file} " - "--pulser_files {input.pulser_files} " - "--input_files {input.files}" + "--fit-results {output.partcal_results} " + "--eres-file {input.eres_file} " + "--hit-pars {output.hit_pars} " + "--plot-file {output.plot_file} " + "--ecal-file {input.ecal_file} " + "--pulser-files {input.pulser_files} " + "--input-files {input.files}" fallback_pht_rule = list(workflow.rules)[-1] diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index 1f6d36f..7cceea1 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -61,5 +61,5 @@ rule build_psp: "--timestamp {params.timestamp} " "--input {params.ro_input[raw_file]} " "--output {output.tier_file} " - "--db_file {output.db_file} " - "--pars_file {params.ro_input[pars_file]} " + "--db-file {output.db_file} " + "--pars-file {params.ro_input[pars_file]} " diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 8f6ee77..37c0836 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -103,10 +103,10 @@ for key, dataset in part.datasets.items(): "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--in_plots {input.dsp_plots} " - "--out_plots {output.psp_plots} " - "--in_obj {input.dsp_objs} " - "--out_obj {output.psp_objs} " + "--in-plots {input.dsp_plots} " + "--out-plots {output.psp_plots} " + "--in-obj {input.dsp_objs} " + "--out-obj {output.psp_objs} " "--input {input.dsp_pars} " "--output {output.psp_pars} " @@ -148,10 +148,10 @@ rule build_par_psp_fallback: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--in_plots {input.dsp_plots} " - "--out_plots {output.psp_plots} " - "--in_obj {input.dsp_objs} " - "--out_obj {output.psp_objs} " + "--in-plots {input.dsp_plots} " + "--out-plots {output.psp_plots} " + "--in-obj {input.dsp_objs} " + "--out-obj {output.psp_objs} " "--input {input.dsp_pars} " "--output {output.psp_pars} " @@ -185,9 +185,9 @@ rule build_svm_psp: shell: f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}' "--log {log} " - "--train_data {input.train_data} " - "--train_hyperpars {input.hyperpars} " - "--output_file {output.dsp_pars}" + "--train-data {input.train_data} " + "--train-hyperpars {input.hyperpars} " + "--output-file {output.dsp_pars}" rule build_pars_psp_svm: @@ -205,6 +205,6 @@ rule build_pars_psp_svm: shell: f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}' "--log {log} " - "--input_file {input.dsp_pars} " - "--output_file {output.dsp_pars} " - "--svm_file {input.svm_model}" + "--input-file {input.dsp_pars} " + "--output-file {output.dsp_pars} " + "--svm-file {input.svm_model}" diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index 8d6250e..7ee105f 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -73,9 +73,9 @@ for key, dataset in part.datasets.items(): "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--phy_files {input.phy_files}" + "--save-path {output.hit_pars} " + "--plot-path {output.plot_file} " + "--phy-files {input.phy_files}" set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc_phy") @@ -114,9 +114,9 @@ rule build_pht_qc_phy: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--save_path {output.hit_pars} " - "--plot_path {output.plot_file} " - "--phy_files {input.phy_files}" + "--save-path {output.hit_pars} " + "--plot-path {output.plot_file} " + "--phy-files {input.phy_files}" fallback_qc_rule = list(workflow.rules)[-1] diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 2411c14..9353826 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -43,7 +43,7 @@ rule build_raw_orca: f'{execenv_smk_py_script(config, "build-tier-raw-orca")}' "--log {log} " f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " + f"--chan-maps {ro(chan_maps)} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "{params.ro_input} {output}" @@ -72,7 +72,7 @@ rule build_raw_fcio: f'{execenv_smk_py_script(config, "build-tier-raw-fcio")}' "--log {log} " f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " + f"--chan-maps {ro(chan_maps)} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "{params.ro_input} {output}" @@ -107,10 +107,10 @@ rule build_raw_blind: f'{execenv_smk_py_script(config, "build-tier-raw-blind")}' "--log {log} " f"--configs {ro(configs)} " - f"--chan_maps {ro(chan_maps)} " + f"--chan-maps {ro(chan_maps)} " f"--metadata {ro(meta)} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " - "--blind_curve {params.ro_input[blind_file]} " + "--blind-curve {params.ro_input[blind_file]} " "--input {params.ro_input[tier_file]} " "--output {output}" diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index 3f38c3b..a2dc119 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -32,5 +32,5 @@ rule build_skm: "--timestamp {params.timestamp} " "--log {log} " "--datatype {params.datatype} " - "--evt_file {params.ro_input} " + "--evt-file {params.ro_input} " "--output {output} " diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index b954bf3..ff4e89a 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -63,6 +63,6 @@ rule build_pulser_ids: "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " - "--tcm_files {params.input} " - "--pulser_file {output.pulser} " + "--tcm-files {params.input} " + "--pulser-file {output.pulser} " "--metadata {meta} " diff --git a/workflow/src/legenddataflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py index 9566068..710b6cc 100644 --- a/workflow/src/legenddataflow/scripts/create_chankeylist.py +++ b/workflow/src/legenddataflow/scripts/create_chankeylist.py @@ -7,12 +7,12 @@ def create_chankeylist() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--det_status", help="det_status", type=str, required=True) + argparser.add_argument("--det-status", help="det_status", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True) - argparser.add_argument("--output_file", help="output_file", type=str, required=True) + argparser.add_argument("--output-file", help="output_file", type=str, required=True) args = argparser.parse_args() det_status = TextDB(args.det_status, lazy=True) diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py index 6fee6f5..4fe1d28 100644 --- a/workflow/src/legenddataflow/scripts/merge_channels.py +++ b/workflow/src/legenddataflow/scripts/merge_channels.py @@ -31,13 +31,13 @@ def merge_channels() -> None: ) argparser.add_argument("--output", help="output file", type=str, required=True) argparser.add_argument( - "--in_db", + "--in-db", help="in db file (used for when lh5 files referred to in db)", type=str, required=False, ) argparser.add_argument( - "--out_db", + "--out-db", help="lh5 file (used for when lh5 files referred to in db)", type=str, required=False, diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py index 2b0004b..16343dc 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py @@ -17,8 +17,8 @@ def par_geds_dsp_dplms() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str) - argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--fft-raw-filelist", help="fft_raw_filelist", type=str) + argparser.add_argument("--peak-file", help="tcm_filelist", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--database", help="database", type=str, required=True) @@ -30,9 +30,9 @@ def par_geds_dsp_dplms() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) - argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True) - argparser.add_argument("--plot_path", help="plot_path", type=str) + argparser.add_argument("--dsp-pars", help="dsp_pars", type=str, required=True) + argparser.add_argument("--lh5-path", help="lh5_path", type=str, required=True) + argparser.add_argument("--plot-path", help="plot_path", type=str) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py index 4b755c2..6376ed5 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py @@ -28,8 +28,8 @@ def par_geds_dsp_eopt() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True) - argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) + argparser.add_argument("--peak-file", help="tcm_filelist", type=str, required=True) + argparser.add_argument("--decay-const", help="decay_const", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str) argparser.add_argument("--log", help="log_file", type=str) @@ -41,13 +41,13 @@ def par_geds_dsp_eopt() -> None: argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument( - "--final_dsp_pars", help="final_dsp_pars", type=str, required=True + "--final-dsp-pars", help="final_dsp_pars", type=str, required=True ) argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str) argparser.add_argument("--plot_path", help="plot_path", type=str) argparser.add_argument( - "--plot_save_path", help="plot_save_path", type=str, required=False + "--plot-save-path", help="plot_save_path", type=str, required=False ) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py index e9b1de6..afd4a0b 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py @@ -82,17 +82,17 @@ def get_out_data( def par_geds_dsp_evtsel() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) + argparser.add_argument("--raw-filelist", help="raw_filelist", type=str) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, required=False + "--tcm-filelist", help="tcm_filelist", type=str, required=False ) argparser.add_argument( - "--pulser_file", help="pulser_file", type=str, required=False + "--pulser-file", help="pulser_file", type=str, required=False ) argparser.add_argument("--decay_const", help="decay_const", type=str, required=True) argparser.add_argument( - "--raw_cal", help="raw_cal", type=str, nargs="*", required=True + "--raw-cal", help="raw_cal", type=str, nargs="*", required=True ) argparser.add_argument("--log", help="log_file", type=str) @@ -103,7 +103,7 @@ def par_geds_dsp_evtsel() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--peak_file", help="peak_file", type=str, required=True) + argparser.add_argument("--peak-file", help="peak_file", type=str, required=True) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py index 691a0e8..d720446 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py @@ -19,7 +19,7 @@ def par_geds_dsp_nopt() -> None: sto = lh5.LH5Store() argparser = argparse.ArgumentParser() - argparser.add_argument("--raw_filelist", help="raw_filelist", type=str) + argparser.add_argument("--raw-filelist", help="raw_filelist", type=str) argparser.add_argument("--database", help="database", type=str, required=True) argparser.add_argument("--inplots", help="inplots", type=str) @@ -31,8 +31,8 @@ def par_geds_dsp_nopt() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True) - argparser.add_argument("--plot_path", help="plot_path", type=str) + argparser.add_argument("--dsp-pars", help="dsp_pars", type=str, required=True) + argparser.add_argument("--plot-path", help="plot_path", type=str) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py index d4a1e22..268ca86 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py @@ -8,10 +8,10 @@ def par_geds_dsp_svm() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--log", help="log file", type=str) argparser.add_argument( - "--output_file", help="output par file", type=str, required=True + "--output-file", help="output par file", type=str, required=True ) argparser.add_argument( - "--input_file", help="input par file", type=str, required=True + "--input-file", help="input par file", type=str, required=True ) argparser.add_argument("--svm_file", help="svm file", required=True) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py index 162ccfa..6ae5764 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py @@ -19,13 +19,13 @@ def par_geds_dsp_svm_build() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument( - "--output_file", help="output SVM file", type=str, required=True + "--output-file", help="output SVM file", type=str, required=True ) argparser.add_argument( - "--train_data", help="input data file", type=str, required=True + "--train-data", help="input data file", type=str, required=True ) argparser.add_argument( - "--train_hyperpars", help="input hyperparameter file", required=True + "--train-hyperpars", help="input hyperparameter file", required=True ) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py index 4d493a1..a86e531 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py @@ -24,16 +24,16 @@ def par_geds_dsp_tau() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_path", help="plot path", type=str, required=False) - argparser.add_argument("--output_file", help="output file", type=str, required=True) + argparser.add_argument("--plot-path", help="plot path", type=str, required=False) + argparser.add_argument("--output-file", help="output file", type=str, required=True) argparser.add_argument( - "--pulser_file", help="pulser file", type=str, required=False + "--pulser-file", help="pulser file", type=str, required=False ) - argparser.add_argument("--raw_files", help="input files", nargs="*", type=str) + argparser.add_argument("--raw-files", help="input files", nargs="*", type=str) argparser.add_argument( - "--tcm_files", help="tcm_files", nargs="*", type=str, required=False + "--tcm-files", help="tcm_files", nargs="*", type=str, required=False ) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py index 2b6c6e1..74ece8e 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py @@ -50,14 +50,14 @@ def par_geds_hit_aoe() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("files", help="files", nargs="*", type=str) argparser.add_argument( - "--pulser_file", help="pulser_file", type=str, required=False + "--pulser-file", help="pulser_file", type=str, required=False ) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, required=False + "--tcm-filelist", help="tcm_filelist", type=str, required=False ) - argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) - argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) + argparser.add_argument("--ecal-file", help="ecal_file", type=str, required=True) + argparser.add_argument("--eres-file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) @@ -68,9 +68,9 @@ def par_geds_hit_aoe() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) - argparser.add_argument("--hit_pars", help="hit_pars", type=str) - argparser.add_argument("--aoe_results", help="aoe_results", type=str) + argparser.add_argument("--plot-file", help="plot_file", type=str, required=False) + argparser.add_argument("--hit-pars", help="hit_pars", type=str) + argparser.add_argument("--aoe-results", help="aoe_results", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py index c67e304..c763433 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py @@ -439,15 +439,15 @@ def par_geds_hit_ecal() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="filelist", nargs="*", type=str) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, required=False + "--tcm-filelist", help="tcm_filelist", type=str, required=False ) argparser.add_argument( - "--pulser_file", help="pulser_file", type=str, required=False + "--pulser-file", help="pulser_file", type=str, required=False ) - argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*") - argparser.add_argument("--in_hit_dict", help="in_hit_dict", required=False) - argparser.add_argument("--inplot_dict", help="inplot_dict", required=False) + argparser.add_argument("--ctc-dict", help="ctc_dict", nargs="*") + argparser.add_argument("--in-hit-dict", help="in_hit_dict", required=False) + argparser.add_argument("--inplot-dict", help="inplot_dict", required=False) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) @@ -459,9 +459,9 @@ def par_geds_hit_ecal() -> None: argparser.add_argument("--log", help="log_file", type=str) - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) - argparser.add_argument("--save_path", help="save_path", type=str) - argparser.add_argument("--results_path", help="results_path", type=str) + argparser.add_argument("--plot-path", help="plot_path", type=str, required=False) + argparser.add_argument("--save-path", help="save_path", type=str) + argparser.add_argument("--results-path", help="results_path", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py index 357fe33..b4dc3f2 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py @@ -49,14 +49,14 @@ def par_geds_hit_lq() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("files", help="files", nargs="*", type=str) argparser.add_argument( - "--pulser_file", help="pulser_file", type=str, required=False + "--pulser-file", help="pulser_file", type=str, required=False ) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, required=False + "--tcm-filelist", help="tcm_filelist", type=str, required=False ) - argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True) - argparser.add_argument("--eres_file", help="eres_file", type=str, required=True) + argparser.add_argument("--ecal-file", help="ecal_file", type=str, required=True) + argparser.add_argument("--eres-file", help="eres_file", type=str, required=True) argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False) argparser.add_argument("--configs", help="configs", type=str, required=True) @@ -67,9 +67,9 @@ def par_geds_hit_lq() -> None: argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--channel", help="Channel", type=str, required=True) - argparser.add_argument("--plot_file", help="plot_file", type=str, required=False) - argparser.add_argument("--hit_pars", help="hit_pars", type=str) - argparser.add_argument("--lq_results", help="lq_results", type=str) + argparser.add_argument("--plot-file", help="plot_file", type=str, required=False) + argparser.add_argument("--hit-pars", help="hit_pars", type=str) + argparser.add_argument("--lq-results", help="lq_results", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py index acc1a32..33934c4 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py @@ -27,17 +27,17 @@ def par_geds_hit_qc() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) - argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--cal-files", help="cal_files", nargs="*", type=str) + argparser.add_argument("--fft-files", help="fft_files", nargs="*", type=str) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, required=False + "--tcm-filelist", help="tcm_filelist", type=str, required=False ) argparser.add_argument( - "--pulser_file", help="pulser_file", type=str, required=False + "--pulser-file", help="pulser_file", type=str, required=False ) argparser.add_argument( - "--overwrite_files", + "--overwrite-files", help="overwrite_files", type=str, required=False, @@ -53,8 +53,8 @@ def par_geds_hit_qc() -> None: argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument("--tier", help="tier", type=str, default="hit") - argparser.add_argument("--plot_path", help="plot_path", type=str, required=False) - argparser.add_argument("--save_path", help="save_path", type=str) + argparser.add_argument("--plot-path", help="plot_path", type=str, required=False) + argparser.add_argument("--save-path", help="save_path", type=str) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py index 12c70f8..76383ef 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py @@ -224,19 +224,19 @@ def eres_func(x): def par_geds_pht_aoe() -> None: argparser = argparse.ArgumentParser() argparser.add_argument( - "--input_files", help="files", type=str, nargs="*", required=True + "--input-files", help="files", type=str, nargs="*", required=True ) argparser.add_argument( - "--pulser_files", help="pulser_file", nargs="*", type=str, required=False + "--pulser-files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False + "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False ) argparser.add_argument( - "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + "--ecal-file", help="ecal_file", type=str, nargs="*", required=True ) argparser.add_argument( - "--eres_file", help="eres_file", type=str, nargs="*", required=True + "--eres-file", help="eres_file", type=str, nargs="*", required=True ) argparser.add_argument( "--inplots", help="eres_file", type=str, nargs="*", required=True @@ -251,10 +251,10 @@ def par_geds_pht_aoe() -> None: argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument( - "--plot_file", help="plot_file", type=str, nargs="*", required=False + "--plot-file", help="plot_file", type=str, nargs="*", required=False ) - argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) - argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str) + argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str) + argparser.add_argument("--aoe-results", help="aoe_results", nargs="*", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py index 560a063..6d9babf 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py @@ -424,19 +424,19 @@ def calibrate_partition( if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - "--input_files", help="files", type=str, nargs="*", required=True + "--input-files", help="files", type=str, nargs="*", required=True ) argparser.add_argument( - "--pulser_files", help="pulser_file", nargs="*", type=str, required=False + "--pulser-files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False + "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False ) argparser.add_argument( - "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + "--ecal-file", help="ecal_file", type=str, nargs="*", required=True ) argparser.add_argument( - "--eres_file", help="eres_file", type=str, nargs="*", required=True + "--eres-file", help="eres_file", type=str, nargs="*", required=True ) argparser.add_argument( "--inplots", help="eres_file", type=str, nargs="*", required=True @@ -451,10 +451,10 @@ def calibrate_partition( argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument( - "--plot_file", help="plot_file", type=str, nargs="*", required=False + "--plot-file", help="plot_file", type=str, nargs="*", required=False ) - argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) - argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str) + argparser.add_argument("--fit-results", help="fit_results", nargs="*", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py index 0faa42d..c3089e1 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py @@ -45,19 +45,19 @@ def run_splitter(files): def par_geds_pht_fast() -> None: argparser = argparse.ArgumentParser() argparser.add_argument( - "--input_files", help="files", type=str, nargs="*", required=True + "--input-files", help="files", type=str, nargs="*", required=True ) argparser.add_argument( - "--pulser_files", help="pulser_file", nargs="*", type=str, required=False + "--pulser-files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False + "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False ) argparser.add_argument( - "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + "--ecal-file", help="ecal_file", type=str, nargs="*", required=True ) argparser.add_argument( - "--eres_file", help="eres_file", type=str, nargs="*", required=True + "--eres-file", help="eres_file", type=str, nargs="*", required=True ) argparser.add_argument( "--inplots", help="eres_file", type=str, nargs="*", required=True @@ -72,10 +72,10 @@ def par_geds_pht_fast() -> None: argparser.add_argument("--log", help="log_file", type=str) argparser.add_argument( - "--plot_file", help="plot_file", type=str, nargs="*", required=False + "--plot-file", help="plot_file", type=str, nargs="*", required=False ) - argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) - argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str) + argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str) + argparser.add_argument("--fit-results", help="fit_results", nargs="*", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py index 78c8c6e..f46914c 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py @@ -241,19 +241,19 @@ def eres_func(x): if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( - "--input_files", help="files", type=str, nargs="*", required=True + "--input-files", help="files", type=str, nargs="*", required=True ) argparser.add_argument( - "--pulser_files", help="pulser_file", type=str, nargs="*", required=False + "--pulser-files", help="pulser_file", type=str, nargs="*", required=False ) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False + "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False ) argparser.add_argument( - "--ecal_file", help="ecal_file", type=str, nargs="*", required=True + "--ecal-file", help="ecal_file", type=str, nargs="*", required=True ) argparser.add_argument( - "--eres_file", help="eres_file", type=str, nargs="*", required=True + "--eres-file", help="eres_file", type=str, nargs="*", required=True ) argparser.add_argument( "--inplots", help="eres_file", type=str, nargs="*", required=True @@ -268,10 +268,10 @@ def eres_func(x): argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument( - "--plot_file", help="plot_file", type=str, nargs="*", required=False + "--plot-file", help="plot_file", type=str, nargs="*", required=False ) - argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str) - argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str) + argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str) + argparser.add_argument("--lq-results", help="lq_results", nargs="*", type=str) argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py index af6dc95..c6ac3df 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py @@ -27,17 +27,17 @@ if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str) - argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str) + argparser.add_argument("--cal-files", help="cal_files", nargs="*", type=str) + argparser.add_argument("--fft-files", help="fft_files", nargs="*", type=str) argparser.add_argument( - "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False + "--tcm-filelist", help="tcm_filelist", nargs="*", type=str, required=False ) argparser.add_argument( - "--pulser_files", help="pulser_file", nargs="*", type=str, required=False + "--pulser-files", help="pulser_file", nargs="*", type=str, required=False ) argparser.add_argument( - "--overwrite_files", help="overwrite_files", nargs="*", type=str, required=False + "--overwrite-files", help="overwrite_files", nargs="*", type=str, required=False ) argparser.add_argument("--configs", help="config", type=str, required=True) @@ -49,10 +49,10 @@ argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument( - "--plot_path", help="plot_path", type=str, nargs="*", required=False + "--plot-path", help="plot_path", type=str, nargs="*", required=False ) argparser.add_argument( - "--save_path", + "--save-path", help="save_path", type=str, nargs="*", diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py index 38f5e20..9007ad7 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py @@ -26,7 +26,7 @@ if __name__ == "__main__": argparser = argparse.ArgumentParser() - argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str) + argparser.add_argument("--phy-files", help="cal_files", nargs="*", type=str) argparser.add_argument("--configs", help="config", type=str, required=True) argparser.add_argument("--metadata", help="metadata path", type=str, required=True) @@ -37,10 +37,10 @@ argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument( - "--plot_path", help="plot_path", type=str, nargs="*", required=False + "--plot-path", help="plot_path", type=str, nargs="*", required=False ) argparser.add_argument( - "--save_path", + "--save-path", help="save_path", type=str, nargs="*", diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py index 65508a2..3ba1423 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py +++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py @@ -24,16 +24,16 @@ def par_geds_psp_average() -> None: "--output", help="output file", nargs="*", type=str, required=True ) argparser.add_argument( - "--in_plots", help="input plot files", nargs="*", type=str, required=False + "--in-plots", help="input plot files", nargs="*", type=str, required=False ) argparser.add_argument( - "--out_plots", help="output plot files", nargs="*", type=str, required=False + "--out-plots", help="output plot files", nargs="*", type=str, required=False ) argparser.add_argument( - "--in_obj", help="input object files", nargs="*", type=str, required=False + "--in-obj", help="input object files", nargs="*", type=str, required=False ) argparser.add_argument( - "--out_obj", help="output object files", nargs="*", type=str, required=False + "--out-obj", help="output object files", nargs="*", type=str, required=False ) argparser.add_argument("--log", help="log_file", type=str) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py index a937458..eeaaf2b 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py @@ -24,8 +24,8 @@ def par_geds_raw_blindcal() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="files", nargs="*", type=str) - argparser.add_argument("--blind_curve", help="blind_curve", type=str) - argparser.add_argument("--plot_file", help="out plot path", type=str) + argparser.add_argument("--blind-curve", help="blind_curve", type=str) + argparser.add_argument("--plot-file", help="out plot path", type=str) argparser.add_argument("--meta", help="meta", type=str) argparser.add_argument("--configs", help="configs", type=str) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py index 7f645c1..5f60c54 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py @@ -30,9 +30,9 @@ def par_geds_raw_blindcheck() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--files", help="files", nargs="*", type=str) argparser.add_argument("--output", help="output file", type=str) - argparser.add_argument("--plot_file", help="plot file", type=str) + argparser.add_argument("--plot-file", help="plot file", type=str) argparser.add_argument( - "--blind_curve", help="blinding curves file", nargs="*", type=str + "--blind-curve", help="blinding curves file", nargs="*", type=str ) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py index ab5f400..4c75d62 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py +++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py @@ -21,10 +21,10 @@ def par_geds_tcm_pulser() -> None: argparser.add_argument("--channel", help="Channel", type=str, required=True) argparser.add_argument( - "--pulser_file", help="pulser file", type=str, required=False + "--pulser-file", help="pulser file", type=str, required=False ) - argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str) + argparser.add_argument("--tcm-files", help="tcm_files", nargs="*", type=str) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) diff --git a/workflow/src/legenddataflow/scripts/tier/dsp.py b/workflow/src/legenddataflow/scripts/tier/dsp.py index 906985b..33d8f9c 100644 --- a/workflow/src/legenddataflow/scripts/tier/dsp.py +++ b/workflow/src/legenddataflow/scripts/tier/dsp.py @@ -38,12 +38,12 @@ def build_tier_dsp() -> None: argparser.add_argument("--tier", help="Tier", type=str, required=True) argparser.add_argument( - "--pars_file", help="database file for detector", nargs="*", default=[] + "--pars-file", help="database file for detector", nargs="*", default=[] ) argparser.add_argument("--input", help="input file", type=str) argparser.add_argument("--output", help="output file", type=str) - argparser.add_argument("--db_file", help="db file", type=str) + argparser.add_argument("--db-file", help="db file", type=str) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True) diff --git a/workflow/src/legenddataflow/scripts/tier/evt.py b/workflow/src/legenddataflow/scripts/tier/evt.py index 15a76d1..195fbd6 100644 --- a/workflow/src/legenddataflow/scripts/tier/evt.py +++ b/workflow/src/legenddataflow/scripts/tier/evt.py @@ -33,12 +33,12 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay): def build_tier_evt() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--hit_file", help="hit file", type=str) - argparser.add_argument("--dsp_file", help="dsp file", type=str) - argparser.add_argument("--tcm_file", help="tcm file", type=str) - argparser.add_argument("--ann_file", help="ann file") - argparser.add_argument("--xtc_file", help="xtc file", type=str) - argparser.add_argument("--par_files", help="par files", nargs="*") + argparser.add_argument("--hit-file", help="hit file", type=str) + argparser.add_argument("--dsp-file", help="dsp file", type=str) + argparser.add_argument("--tcm-file", help="tcm file", type=str) + argparser.add_argument("--ann-file", help="ann file") + argparser.add_argument("--xtc-file", help="xtc file", type=str) + argparser.add_argument("--par-files", help="par files", nargs="*") argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) diff --git a/workflow/src/legenddataflow/scripts/tier/hit.py b/workflow/src/legenddataflow/scripts/tier/hit.py index 9fd489f..ffaf25b 100644 --- a/workflow/src/legenddataflow/scripts/tier/hit.py +++ b/workflow/src/legenddataflow/scripts/tier/hit.py @@ -13,7 +13,7 @@ def build_tier_hit() -> None: argparser = argparse.ArgumentParser() argparser.add_argument("--input", help="input file", type=str) - argparser.add_argument("--pars_file", help="hit pars file", nargs="*") + argparser.add_argument("--pars-file", help="hit pars file", nargs="*") argparser.add_argument("--configs", help="configs", type=str, required=True) argparser.add_argument("--metadata", help="metadata", type=str, required=True) @@ -24,7 +24,7 @@ def build_tier_hit() -> None: argparser.add_argument("--tier", help="Tier", type=str, required=True) argparser.add_argument("--output", help="output file", type=str) - argparser.add_argument("--db_file", help="db file", type=str) + argparser.add_argument("--db-file", help="db file", type=str) args = argparser.parse_args() configs = TextDB(args.configs, lazy=True) diff --git a/workflow/src/legenddataflow/scripts/tier/raw_blind.py b/workflow/src/legenddataflow/scripts/tier/raw_blind.py index 19eb023..8fa827a 100644 --- a/workflow/src/legenddataflow/scripts/tier/raw_blind.py +++ b/workflow/src/legenddataflow/scripts/tier/raw_blind.py @@ -27,12 +27,12 @@ def build_tier_raw_blind() -> None: argparser.add_argument("--input", help="input file", type=str) argparser.add_argument("--output", help="output file", type=str) argparser.add_argument( - "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*" + "--blind-curve", help="blinding curves file", type=str, required=True, nargs="*" ) argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--configs", help="config file", type=str) - argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--chan-maps", help="chan map", type=str) argparser.add_argument("--metadata", help="metadata", type=str) argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py index fefc8a1..c52f441 100644 --- a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py +++ b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py @@ -17,7 +17,7 @@ def build_tier_raw_fcio() -> None: argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--configs", help="config file", type=str) - argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--chan-maps", help="chan map", type=str) argparser.add_argument("--log", help="log file", type=str) args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py index 00d7751..ca6a9f3 100644 --- a/workflow/src/legenddataflow/scripts/tier/raw_orca.py +++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py @@ -17,7 +17,7 @@ def build_tier_raw_orca() -> None: argparser.add_argument("--datatype", help="Datatype", type=str, required=True) argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True) argparser.add_argument("--configs", help="config file", type=str) - argparser.add_argument("--chan_maps", help="chan map", type=str) + argparser.add_argument("--chan-maps", help="chan map", type=str) argparser.add_argument("--log", help="log file") args = argparser.parse_args() diff --git a/workflow/src/legenddataflow/scripts/tier/skm.py b/workflow/src/legenddataflow/scripts/tier/skm.py index a698629..f194a00 100644 --- a/workflow/src/legenddataflow/scripts/tier/skm.py +++ b/workflow/src/legenddataflow/scripts/tier/skm.py @@ -23,7 +23,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""): def build_tier_skm() -> None: argparser = argparse.ArgumentParser() - argparser.add_argument("--evt_file", help="evt file", required=True) + argparser.add_argument("--evt-file", help="evt file", required=True) argparser.add_argument("--configs", help="configs", required=True) argparser.add_argument("--datatype", help="datatype", required=True) argparser.add_argument("--timestamp", help="timestamp", required=True) From ccfcbce091ed0bdde6605d1b445ae3360c3cb72f Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 5 Feb 2025 12:25:04 +0100 Subject: [PATCH 095/101] rename execenv_smk_py_script to execenv_pyexe --- workflow/rules/ann.smk | 12 +++++----- workflow/rules/blinding_calibration.smk | 11 ++++------ workflow/rules/blinding_check.smk | 10 ++++----- workflow/rules/chanlist_gen.smk | 11 +++++----- workflow/rules/channel_merge.smk | 10 ++++----- workflow/rules/dsp.smk | 5 ++--- workflow/rules/dsp_pars_geds.smk | 23 +++++++------------- workflow/rules/evt.smk | 11 ++++------ workflow/rules/hit.smk | 5 ++--- workflow/rules/hit_pars_geds.smk | 14 +++++------- workflow/rules/pht.smk | 5 ++--- workflow/rules/pht_pars_geds.smk | 29 +++++++++---------------- workflow/rules/pht_pars_geds_fast.smk | 8 +++---- workflow/rules/psp.smk | 5 ++--- workflow/rules/psp_pars_geds.smk | 14 +++++------- workflow/rules/qc_phy.smk | 14 +++++------- workflow/rules/raw.smk | 11 ++++------ workflow/rules/skm.smk | 5 ++--- workflow/rules/tcm.smk | 8 +++---- workflow/src/legenddataflow/__init__.py | 4 ++-- workflow/src/legenddataflow/execenv.py | 13 +++++------ 21 files changed, 88 insertions(+), 140 deletions(-) diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk index 5cdd016..1e48623 100644 --- a/workflow/rules/ann.smk +++ b/workflow/rules/ann.smk @@ -9,7 +9,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe rule build_ann: @@ -30,11 +30,10 @@ rule build_ann: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - f'{execenv_smk_py_script(config, "build-tier-dsp")}' - "--log {log} " + execenv_pyexe(config, "build-tier-dsp") + "--log {log} " "--configs {configs} " "--metadata {meta} " - f"--tier ann " + "--tier ann " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {input.dsp_file} " @@ -61,11 +60,10 @@ rule build_pan: runtime=300, mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15, shell: - f'{execenv_smk_py_script(config, "build-tier-dsp")}' - "--log {log} " + execenv_pyexe(config, "build-tier-dsp") + "--log {log} " "--configs {configs} " "--metadata {meta} " - f"--tier pan " + "--tier pan " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--input {input.dsp_file} " diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk index 31e71a8..a4dcc1e 100644 --- a/workflow/rules/blinding_calibration.smk +++ b/workflow/rules/blinding_calibration.smk @@ -12,7 +12,7 @@ from legenddataflow.patterns import ( get_pattern_log_channel, ) from pathlib import Path -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe rule build_blinding_calibration: @@ -38,8 +38,7 @@ rule build_blinding_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-raw-blindcal")}' - "--log {log} " + execenv_pyexe(config, "par-geds-raw-blindcal") + "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -66,8 +65,7 @@ rule build_plts_blinding: group: "merge-blindcal" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' - "--input {input} " + execenv_pyexe(config, "merge-channels") + "--input {input} " "--output {output} " @@ -88,6 +86,5 @@ rule build_pars_blinding: group: "merge-blindcal" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' - "--input {input.infiles} " + execenv_pyexe(config, "merge-channels") + "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk index 2bee385..0822d9d 100644 --- a/workflow/rules/blinding_check.smk +++ b/workflow/rules/blinding_check.smk @@ -12,7 +12,7 @@ from legenddataflow.patterns import ( get_pattern_plts, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe from pathlib import Path @@ -39,8 +39,7 @@ rule build_blinding_check: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-raw-blindcheck")}' - "--log {log} " + execenv_pyexe(config, "par-geds-raw-blindcheck") + "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -67,8 +66,7 @@ rule build_plts_raw: group: "merge-raw" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' - "--input {input} " + execenv_pyexe(config, "merge-channels") + "--input {input} " "--output {output} " @@ -91,4 +89,4 @@ rule build_pars_raw: group: "merge-raw" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' + execenv_pyexe(config, "merge-channels") diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk index 750104b..0f30cc0 100644 --- a/workflow/rules/chanlist_gen.smk +++ b/workflow/rules/chanlist_gen.smk @@ -9,7 +9,7 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_plts_tmp_channel, ) -from legenddataflow import execenv_smk_py_script +from legenddataflow import execenv_pyexe from legenddataflow.utils import filelist_path @@ -23,10 +23,11 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps): f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}", ) - cmd = execenv_smk_py_script(config, "create-chankeylist") - cmd += f" --det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " - cmd += f"--datatype cal --output-file {output_file}" - os.system(cmd) + os.system( + execenv_pyexe(config, "create-chankeylist") + + "--det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} " + "--datatype cal --output-file {output_file}" + ) with open(output_file) as r: chan_list = r.read().splitlines() diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk index b221fc3..42b06c9 100644 --- a/workflow/rules/channel_merge.smk +++ b/workflow/rules/channel_merge.smk @@ -8,7 +8,7 @@ from legenddataflow.patterns import ( ) from legenddataflow.utils import set_last_rule_name import inspect -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): if lh5_tier is None: @@ -31,7 +31,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): group: f"merge-{tier}" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' + execenv_pyexe(config, "merge-channels") + \ "--input {input} " "--output {output} " "--channelmap {meta} " @@ -64,7 +64,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): group: f"merge-{tier}" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' + execenv_pyexe(config, "merge-channels") + \ "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -97,7 +97,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): group: f"merge-{tier}" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' + execenv_pyexe(config, "merge-channels") + \ "--input {input} " "--output {output} " "--timestamp {params.timestamp} " @@ -144,7 +144,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None): f"merge-{tier}" run: shell_string = ( - f'{execenv_smk_py_script(config, "merge-channels")}' + execenv_pyexe(config, "merge-channels") + \ "--output {output.out_file} " "--input {input.in_files} " "--timestamp {params.timestamp} " diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk index 20c5d38..f9a9299 100644 --- a/workflow/rules/dsp.smk +++ b/workflow/rules/dsp.smk @@ -14,7 +14,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe dsp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -52,8 +52,7 @@ rule build_dsp: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - f'{execenv_smk_py_script(config, "build-tier-dsp")}' - "--log {log} " + execenv_pyexe(config, "build-tier-dsp") + "--log {log} " "--tier dsp " f"--configs {ro(configs)} " "--metadata {meta} " diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk index 8d1f075..52fae7c 100644 --- a/workflow/rules/dsp_pars_geds.smk +++ b/workflow/rules/dsp_pars_geds.smk @@ -13,7 +13,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe rule build_pars_dsp_tau_geds: @@ -36,8 +36,7 @@ rule build_pars_dsp_tau_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-tau")}' - "--configs {configs} " + execenv_pyexe(config, "par-geds-dsp-tau") + "--configs {configs} " "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -70,8 +69,7 @@ rule build_pars_evtsel_geds: runtime=300, mem_swap=70, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-evtsel")}' - "--configs {configs} " + execenv_pyexe(config, "par-geds-dsp-evtsel") + "--configs {configs} " "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -107,8 +105,7 @@ rule build_pars_dsp_nopt_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-nopt")}' - "--database {input.database} " + execenv_pyexe(config, "par-geds-dsp-nopt") + "--database {input.database} " "--configs {configs} " "--log {log} " "--datatype {params.datatype} " @@ -144,9 +141,8 @@ rule build_pars_dsp_dplms_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-dplms")}' + execenv_pyexe(config, "par-geds-dsp-dplms") + "--peak-file {input.peak_file} " "--fft-raw-filelist {input.fft_files} " - "--peak-file {input.peak_file} " "--database {input.database} " "--inplots {input.inplots} " "--configs {configs} " @@ -182,8 +178,7 @@ rule build_pars_dsp_eopt_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-eopt")}' - "--log {log} " + execenv_pyexe(config, "par-geds-dsp-eopt") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -213,8 +208,7 @@ rule build_svm_dsp_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}' - "--log {log} " + execenv_pyexe(config, "par-geds-dsp-svm-build") + "--log {log} " "--train-data {input.train_data} " "--train-hyperpars {input.hyperpars} " "--output-file {output.dsp_pars}" @@ -233,8 +227,7 @@ rule build_pars_dsp_svm_geds: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}' - "--log {log} " + execenv_pyexe(config, "par-geds-dsp-svm") + "--log {log} " "--input-file {input.dsp_pars} " "--output-file {output.dsp_pars} " "--svm-file {input.svm_file}" diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk index cc72249..6a8147e 100644 --- a/workflow/rules/evt.smk +++ b/workflow/rules/evt.smk @@ -9,7 +9,7 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_log_concat, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe rule build_evt: @@ -44,8 +44,7 @@ rule build_evt: mem_swap=50, run: shell_string = ( - f'{execenv_smk_py_script(config, "build-tier-evt")}' - f"--configs {ro(configs)} " + execenv_pyexe(config, "build-tier-evt") + f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " "--tier {params.tier} " @@ -96,8 +95,7 @@ rule build_pet: mem_swap=50, run: shell_string = ( - f'{execenv_smk_py_script(config, "build-tier-evt")}' - f"--configs {ro(configs)} " + execenv_pyexe(config, "build-tier-evt") + f"--configs {ro(configs)} " f"--metadata {ro(meta)} " "--log {log} " "--tier {params.tier} " @@ -144,8 +142,7 @@ for evt_tier in ("evt", "pet"): group: "tier-evt" shell: - f'{execenv_smk_py_script(config, "lh5concat")}' - "--verbose --overwrite " + execenv_pyexe(config, "lh5concat") + "--verbose --overwrite " "--output {output} " "-- {params.ro_input} &> {log}" diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk index 469b0f5..3d4926c 100644 --- a/workflow/rules/hit.smk +++ b/workflow/rules/hit.smk @@ -14,7 +14,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars_tmp, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe hit_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -50,8 +50,7 @@ rule build_hit: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "build-tier-hit")}' - f"--configs {ro(configs)} " + execenv_pyexe(config, "build-tier-hit") + f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " "--tier {params.tier} " diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk index 0b0aef6..b60b88c 100644 --- a/workflow/rules/hit_pars_geds.smk +++ b/workflow/rules/hit_pars_geds.smk @@ -18,7 +18,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe # This rule builds the qc using the calibration dsp files and fft files @@ -46,8 +46,7 @@ rule build_qc: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-hit-qc")}' - "--log {log} " + execenv_pyexe(config, "par-geds-hit-qc") + "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -94,8 +93,7 @@ rule build_energy_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}' - "--log {log} " + execenv_pyexe(config, "par-geds-hit-ecal") + "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -142,8 +140,7 @@ rule build_aoe_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-hit-aoe")}' - "--log {log} " + execenv_pyexe(config, "par-geds-hit-aoe") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -188,8 +185,7 @@ rule build_lq_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-hit-lq")}' - "--log {log} " + execenv_pyexe(config, "par-geds-hit-lq") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk index 447cee0..1f75c4f 100644 --- a/workflow/rules/pht.smk +++ b/workflow/rules/pht.smk @@ -15,7 +15,7 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp, get_pattern_log, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe pht_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -53,8 +53,7 @@ rule build_pht: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "build-tier-hit")}' - f"--configs {ro(configs)} " + execenv_pyexe(config, "build-tier-hit") + f"--configs {ro(configs)} " "--metadata {meta} " "--log {log} " "--tier {params.tier} " diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk index cec57b5..306a46c 100644 --- a/workflow/rules/pht_pars_geds.smk +++ b/workflow/rules/pht_pars_geds.smk @@ -20,7 +20,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe pht_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -101,8 +101,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 30, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-qc")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-qc") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -152,8 +151,7 @@ rule build_pht_qc: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-qc")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-qc") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -213,8 +211,7 @@ rule build_per_energy_calibration: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}' - "--log {log} " + execenv_pyexe(config, "par-geds-hit-ecal") + "--log {log} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " "--channel {params.channel} " @@ -325,8 +322,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-ecal-part") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -385,8 +381,7 @@ rule build_pht_energy_super_calibrations: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-ecal-part") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -506,8 +501,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-aoe") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -566,8 +560,7 @@ rule build_pht_aoe_calibrations: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-aoe") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -685,8 +678,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 15, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-lq")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-lq") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -740,8 +732,7 @@ rule build_pht_lq_calibration: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-lq")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-lq") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " diff --git a/workflow/rules/pht_pars_geds_fast.smk b/workflow/rules/pht_pars_geds_fast.smk index c6e0232..9f4d7b4 100644 --- a/workflow/rules/pht_pars_geds_fast.smk +++ b/workflow/rules/pht_pars_geds_fast.smk @@ -11,7 +11,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe pht_fast_rules = {} @@ -106,8 +106,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 12, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-fast")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-fast") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " @@ -164,8 +163,7 @@ rule par_pht_fast: mem_swap=50, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-fast")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-fast") + "--log {log} " "--configs {configs} " "--metadata {meta} " "--datatype {params.datatype} " diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk index 7cceea1..3dc8c3f 100644 --- a/workflow/rules/psp.smk +++ b/workflow/rules/psp.smk @@ -14,7 +14,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -52,8 +52,7 @@ rule build_psp: runtime=300, mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25, shell: - f'{execenv_smk_py_script(config, "build-tier-dsp")}' - "--log {log} " + execenv_pyexe(config, "build-tier-dsp") + "--log {log} " "--tier psp " f"--configs {ro(configs)} " "--metadata {meta} " diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk index 37c0836..db4fa35 100644 --- a/workflow/rules/psp_pars_geds.smk +++ b/workflow/rules/psp_pars_geds.smk @@ -14,7 +14,7 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_tier, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe psp_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -97,8 +97,7 @@ for key, dataset in part.datasets.items(): resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-psp-average")}' - "--log {log} " + execenv_pyexe(config, "par-geds-psp-average") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -142,8 +141,7 @@ rule build_par_psp_fallback: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-psp-average")}' - "--log {log} " + execenv_pyexe(config, "par-geds-psp-average") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -183,8 +181,7 @@ rule build_svm_psp: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}' - "--log {log} " + execenv_pyexe(config, "par-geds-dsp-svm-build") + "--log {log} " "--train-data {input.train_data} " "--train-hyperpars {input.hyperpars} " "--output-file {output.dsp_pars}" @@ -203,8 +200,7 @@ rule build_pars_psp_svm: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}' - "--log {log} " + execenv_pyexe(config, "par-geds-dsp-svm") + "--log {log} " "--input-file {input.dsp_pars} " "--output-file {output.dsp_pars} " "--svm-file {input.svm_model}" diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk index 7ee105f..aaea3c0 100644 --- a/workflow/rules/qc_phy.smk +++ b/workflow/rules/qc_phy.smk @@ -11,7 +11,7 @@ from legenddataflow.patterns import ( get_pattern_log, get_pattern_pars, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe intier = "psp" @@ -67,8 +67,7 @@ for key, dataset in part.datasets.items(): mem_swap=len(part.get_filelists(partition, key, intier)) * 20, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-qc-phy") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -108,8 +107,7 @@ rule build_pht_qc_phy: mem_swap=60, runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}' - "--log {log} " + execenv_pyexe(config, "par-geds-pht-qc-phy") + "--log {log} " "--configs {configs} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -146,8 +144,7 @@ rule build_plts_pht_phy: group: "merge-hit" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' - "--input {input} " + execenv_pyexe(config, "merge-channels") + "--input {input} " "--output {output} " @@ -168,6 +165,5 @@ rule build_pars_pht_phy: group: "merge-hit" shell: - f'{execenv_smk_py_script(config, "merge-channels")}' - "--input {input.infiles} " + execenv_pyexe(config, "merge-channels") + "--input {input.infiles} " "--output {output} " diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk index 9353826..9b60a06 100644 --- a/workflow/rules/raw.smk +++ b/workflow/rules/raw.smk @@ -7,7 +7,7 @@ from legenddataflow.patterns import ( ) from legenddataflow.utils import set_last_rule_name from legenddataflow.create_pars_keylist import ParsKeyResolve -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe raw_par_catalog = ParsKeyResolve.get_par_catalog( ["-*-*-*-cal"], @@ -40,8 +40,7 @@ rule build_raw_orca: mem_swap=110, runtime=300, shell: - f'{execenv_smk_py_script(config, "build-tier-raw-orca")}' - "--log {log} " + execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} " f"--configs {ro(configs)} " f"--chan-maps {ro(chan_maps)} " "--datatype {params.datatype} " @@ -69,8 +68,7 @@ rule build_raw_fcio: mem_swap=110, runtime=300, shell: - f'{execenv_smk_py_script(config, "build-tier-raw-fcio")}' - "--log {log} " + execenv_pyexe(config, "build-tier-raw-fcio") + "--log {log} " f"--configs {ro(configs)} " f"--chan-maps {ro(chan_maps)} " "--datatype {params.datatype} " @@ -104,8 +102,7 @@ rule build_raw_blind: mem_swap=110, runtime=300, shell: - f'{execenv_smk_py_script(config, "build-tier-raw-blind")}' - "--log {log} " + execenv_pyexe(config, "build-tier-raw-blind") + "--log {log} " f"--configs {ro(configs)} " f"--chan-maps {ro(chan_maps)} " f"--metadata {ro(meta)} " diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk index a2dc119..d4f040b 100644 --- a/workflow/rules/skm.smk +++ b/workflow/rules/skm.smk @@ -8,7 +8,7 @@ from legenddataflow.patterns import ( get_pattern_pars, get_pattern_log_concat, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe rule build_skm: @@ -27,8 +27,7 @@ rule build_skm: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "build-tier-skm")}' - f"--configs {ro(configs)} " + execenv_pyexe(config, "build-tier-skm") + f"--configs {ro(configs)} " "--timestamp {params.timestamp} " "--log {log} " "--datatype {params.datatype} " diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk index ff4e89a..f4e7b2c 100644 --- a/workflow/rules/tcm.smk +++ b/workflow/rules/tcm.smk @@ -8,7 +8,7 @@ from legenddataflow.patterns import ( get_pattern_pars_tmp_channel, get_pattern_log_channel, ) -from legenddataflow.execenv import execenv_smk_py_script +from legenddataflow.execenv import execenv_pyexe # This rule builds the tcm files each raw file @@ -29,8 +29,7 @@ rule build_tier_tcm: runtime=300, mem_swap=20, shell: - f'{execenv_smk_py_script(config, "build-tier-tcm")}' - "--log {log} " + execenv_pyexe(config, "build-tier-tcm") + "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " @@ -57,8 +56,7 @@ rule build_pulser_ids: resources: runtime=300, shell: - f'{execenv_smk_py_script(config, "par-geds-tcm-pulser")}' - "--log {log} " + execenv_pyexe(config, "par-geds-tcm-pulser") + "--log {log} " f"--configs {ro(configs)} " "--datatype {params.datatype} " "--timestamp {params.timestamp} " diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py index ca8ddbb..5392601 100644 --- a/workflow/src/legenddataflow/__init__.py +++ b/workflow/src/legenddataflow/__init__.py @@ -2,8 +2,8 @@ from .create_pars_keylist import ParsKeyResolve from .execenv import ( execenv_prefix, + execenv_pyexe, execenv_python, - execenv_smk_py_script, ) from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey from .pars_loading import ParsCatalog @@ -21,8 +21,8 @@ "ParsKeyResolve", "ProcessingFileKey", "execenv_prefix", + "execenv_pyexe", "execenv_python", - "execenv_smk_py_script", "subst_vars", "subst_vars_in_snakemake_config", "unix_time", diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index 9fd2ac0..a34e805 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -60,18 +60,15 @@ def execenv_python(config, aslist=False): return " ".join(cmdline), cmdenv -def execenv_smk_py_script(config, scriptname, aslist=False): - """Returns the command used to run a Python script for a Snakemake rule. +def execenv_pyexe(config, exename): + """Returns the command used to run a legend-dataflow executable for a Snakemake rule. - For example: `apptainer run image.sif python path/to/script.py` + For example: `apptainer run image.sif path/to/bindir/` """ - config = AttrsDict(config) - cmdline, _ = execenv_prefix(config, aslist=True) - cmdline.append(f"{config.paths.install}/bin/{scriptname} ") + # NOTE: space after the executable name + cmdline.append(f"{config.paths.install}/bin/{exename} ") - if aslist: - return cmdline return " ".join(cmdline) From 401814af092c08560e820ea10484b92e5913167d Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 5 Feb 2025 23:25:34 +0100 Subject: [PATCH 096/101] improve execenv code and add tests --- tests/test_execenv.py | 91 ++++++++++++++++ workflow/src/legenddataflow/__init__.py | 2 - workflow/src/legenddataflow/execenv.py | 136 +++++++++++++++--------- 3 files changed, 175 insertions(+), 54 deletions(-) create mode 100644 tests/test_execenv.py diff --git a/tests/test_execenv.py b/tests/test_execenv.py new file mode 100644 index 0000000..4aa354d --- /dev/null +++ b/tests/test_execenv.py @@ -0,0 +1,91 @@ +import os + +import pytest +from dbetto import AttrsDict +from legenddataflow import execenv + +os.environ["XDG_RUNTIME_DIR"] = "whatever" + + +@pytest.fixture(scope="module") +def config(): + return AttrsDict( + { + "paths": {"install": ".snakemake/software"}, + "execenv": { + "cmd": "apptainer exec", + "arg": "image.sif", + "env": { + "VAR1": "val1", + "VAR2": "val2", + }, + }, + } + ) + + +def test_execenv2str(): + assert ( + execenv._execenv2str(["cmd", "-v", "opt"], {"VAR1": "val1", "VAR2": "val2"}) + == "VAR1=val1 VAR2=val2 cmd -v opt" + ) + + +def test_execenv_prefix(config): + cmd_expr, cmd_env = execenv.execenv_prefix(config, as_string=False) + + assert cmd_expr == [ + "apptainer", + "exec", + "--env=VAR1=val1", + "--env=VAR2=val2", + "--bind=whatever", + "image.sif", + ] + assert cmd_env == config.execenv.env + + config.execenv.cmd = "docker run" + cmd_expr, cmd_env = execenv.execenv_prefix(config, as_string=False) + + assert cmd_expr == [ + "docker", + "run", + "--env=VAR1=val1", + "--env=VAR2=val2", + "--volume=whatever:whatever", + "image.sif", + ] + assert cmd_env == config.execenv.env + + config.execenv.cmd = "shifter" + config.execenv.arg = "--image=legendexp/legend-base:latest" + cmd_expr, cmd_env = execenv.execenv_prefix(config, as_string=False) + + assert cmd_expr == [ + "shifter", + "--env=VAR1=val1", + "--env=VAR2=val2", + "--volume=whatever:whatever", + "--image=legendexp/legend-base:latest", + ] + assert cmd_env == config.execenv.env + + cmd_str = execenv.execenv_prefix(config, as_string=True) + assert cmd_str == ( + "VAR1=val1 VAR2=val2 " + "shifter --env=VAR1=val1 --env=VAR2=val2 " + "--volume=whatever:whatever " + "--image=legendexp/legend-base:latest " + ) + + +def test_execenv_pyexe(config): + cmd_str = execenv.execenv_pyexe(config, "dio-boe") + + assert cmd_str == ( + "VAR1=val1 VAR2=val2 " + "shifter --env=VAR1=val1 --env=VAR2=val2 " + "--volume=whatever:whatever " + "--image=legendexp/legend-base:latest " + ".snakemake/software/bin/dio-boe " + ) diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py index 5392601..a8ba884 100644 --- a/workflow/src/legenddataflow/__init__.py +++ b/workflow/src/legenddataflow/__init__.py @@ -3,7 +3,6 @@ from .execenv import ( execenv_prefix, execenv_pyexe, - execenv_python, ) from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey from .pars_loading import ParsCatalog @@ -22,7 +21,6 @@ "ProcessingFileKey", "execenv_prefix", "execenv_pyexe", - "execenv_python", "subst_vars", "subst_vars_in_snakemake_config", "unix_time", diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py index a34e805..c11b372 100644 --- a/workflow/src/legenddataflow/execenv.py +++ b/workflow/src/legenddataflow/execenv.py @@ -7,6 +7,7 @@ import shutil import subprocess from pathlib import Path +from typing import Iterable, Mapping import colorlog import dbetto @@ -17,68 +18,105 @@ log = logging.getLogger(__name__) -def execenv_prefix(config, aslist=False): +def _execenv2str(cmd_expr: Iterable, cmd_env: Mapping) -> str: + return " ".join([f"{k}={v}" for k, v in cmd_env.items()]) + " " + " ".join(cmd_expr) + + +def apptainer_env_vars(cmdenv: Mapping) -> list[str]: + return [f"--env={var}={val}" for var, val in cmdenv.items()] + + +def docker_env_vars(cmdenv: Mapping) -> list[str]: + # same syntax + return apptainer_env_vars(cmdenv) + + +def shifter_env_vars(cmdenv: Mapping) -> list[str]: + # same syntax + return apptainer_env_vars(cmdenv) + + +def execenv_prefix( + config: AttrsDict, as_string: bool = True +) -> str | tuple[list, dict]: """Returns the software environment command prefix. For example: `apptainer run image.sif` + + Note + ---- + If `as_string` is True, a space is appended to the returned string. """ config = AttrsDict(config) + cmdline = [] + if "env" in config.execenv: + cmdenv = config.execenv.env + if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv: cmdline = shlex.split(config.execenv.cmd) - if "env" in config.execenv: - # FIXME: this is not portable, only works with Apptainer and Docker - cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()] - cmdenv = {} + has_xdg = False xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR") if xdg_runtime_dir: - cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir + has_xdg = True + + if "env" in config.execenv: + if any(exe in config.execenv.cmd for exe in ("apptainer", "singularity")): + cmdline += apptainer_env_vars(config.execenv.env) + if has_xdg: + cmdline += [f"--bind={xdg_runtime_dir}"] + + elif "docker" in config.execenv.cmd: + cmdline += docker_env_vars(config.execenv.env) + + elif "shifter" in config.execenv.cmd: + cmdline += shifter_env_vars(config.execenv.env) + + if ( + any(exe in config.execenv.cmd for exe in ("docker", "shifter")) + and has_xdg + ): + cmdline += [f"--volume={xdg_runtime_dir}:{xdg_runtime_dir}"] + # now we can add the arguments cmdline += shlex.split(config.execenv.arg) - else: - cmdenv = {} - cmdline = [] - if aslist: - return cmdline, cmdenv - return " ".join(cmdline), cmdenv + if as_string: + return _execenv2str(cmdline, cmdenv) + " " + + return cmdline, cmdenv -def execenv_python(config, aslist=False): +def execenv_pyexe( + config: AttrsDict, exename: str, as_string: bool = True +) -> str | tuple[list, dict]: """Returns the Python interpreter command. For example: `apptainer run image.sif python` + + Note + ---- + If `as_string` is True, a space is appended to the returned string. """ config = AttrsDict(config) - cmdline, cmdenv = execenv_prefix(config, aslist=True) - cmdline.append(f"{config.paths.install}/bin/python") - - if aslist: - return cmdline, cmdenv - return " ".join(cmdline), cmdenv + cmdline, cmdenv = execenv_prefix(config, as_string=False) + cmdline.append(f"{config.paths.install}/bin/{exename}") + if as_string: + return _execenv2str(cmdline, cmdenv) + " " -def execenv_pyexe(config, exename): - """Returns the command used to run a legend-dataflow executable for a Snakemake rule. - - For example: `apptainer run image.sif path/to/bindir/` - """ - cmdline, _ = execenv_prefix(config, aslist=True) - # NOTE: space after the executable name - cmdline.append(f"{config.paths.install}/bin/{exename} ") - - return " ".join(cmdline) + return cmdline, cmdenv def dataprod() -> None: - """dataprod's command-line interface for installing and loading the software in the data production environment. + """dataprod's CLI for installing and loading the software in the data production environment. .. code-block:: console $ dataprod --help - $ dataprod exec --help # help section for a specific sub-command + $ dataprod install --help # help section for a specific sub-command """ parser = argparse.ArgumentParser( @@ -139,9 +177,9 @@ def dataprod() -> None: def install(args) -> None: - """ - This function installs user software in the data production environment. - The software packages should be specified in the config.yaml file with the + """Installs user software in the data production environment. + + The software packages should be specified in the `config_file` with the format: ```yaml @@ -149,6 +187,12 @@ def install(args) -> None: - python_package_spec - ... ``` + + .. code-block:: console + + $ dataprod install config.yaml + $ dataprod install --editable config.yaml # install legend-dataflow in editable mode + $ dataprod install --remove config.yaml # remove install directory """ config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file)) config_loc = Path(args.config_file).resolve().parent @@ -166,17 +210,12 @@ def install(args) -> None: shutil.rmtree(path_install) def _runcmd(cmd_expr, cmd_env, **kwargs): - msg = ( - "running: " - + " ".join([f"{k}={v}" for k, v in cmd_env.items()]) - + " " - + " ".join(cmd_expr), - ) + msg = "running: " + _execenv2str(cmd_expr, cmd_env) log.debug(msg) subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs) - cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True) + cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False) has_uv = False try: @@ -198,7 +237,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): log.info(f"configuring virtual environment in {path_install}") _runcmd(cmd_expr, cmd_env) - python, cmd_env = execenv_python(config_dict, aslist=True) + python, cmd_env = execenv_pyexe(config_dict, "python", as_string=False) if not has_uv: cmd_expr = [ @@ -247,9 +286,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs): def cmdexec(args) -> None: - """ - This function loads the data production environment and executes a given command. - """ + """Load the data production environment and execute a given command.""" config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file)) config_loc = Path(args.config_file).resolve().parent @@ -260,15 +297,10 @@ def cmdexec(args) -> None: ignore_missing=False, ) - cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True) + cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False) cmd_expr = [*cmd_prefix, *args.command] - msg = ( - "running: " - + " ".join([f"{k}={v}" for k, v in cmd_env.items()]) - + " " - + " ".join(cmd_expr), - ) + msg = "running: " + _execenv2str(cmd_expr, cmd_env) log.debug(msg) subprocess.run(cmd_expr, env=cmd_env, check=True) From 084ab10c74749ee9afea3393854d63856b688c50 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Wed, 5 Feb 2025 23:32:01 +0100 Subject: [PATCH 097/101] add another simple execenv test --- tests/test_execenv.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_execenv.py b/tests/test_execenv.py index 4aa354d..f12af17 100644 --- a/tests/test_execenv.py +++ b/tests/test_execenv.py @@ -78,6 +78,17 @@ def test_execenv_prefix(config): "--image=legendexp/legend-base:latest " ) + config = { + "execenv": { + "env": { + "VAR1": "val1", + "VAR2": "val2", + } + } + } + cmd_str = execenv.execenv_prefix(config, as_string=True) + assert cmd_str == "VAR1=val1 VAR2=val2 " + def test_execenv_pyexe(config): cmd_str = execenv.execenv_pyexe(config, "dio-boe") From 6ece274dec9498ba7b0d985723aa969900c2918e Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 6 Feb 2025 12:03:35 +0100 Subject: [PATCH 098/101] round of fixes --- workflow/Snakefile | 4 ++-- workflow/Snakefile-build-raw | 19 ++++++++++--------- workflow/profiles/lngs-build-raw/config.yaml | 2 -- workflow/rules/main.smk | 4 ++-- .../legenddataflow/scripts/complete_run.py | 5 ++--- .../legenddataflow/scripts/write_filelist.py | 3 ++- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index db7e3c3..861499f 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -80,7 +80,7 @@ onstart: # Make sure some packages are initialized before we begin to avoid race conditions for pkg in ["dspeed", "lgdo", "matplotlib"]: - shell('{swenv} python3 -B -c "import ' + pkg + '"') + shell(execenv.execenv_pyexe(config, "python") + f" -c 'import {pkg}'") # Log parameter catalogs in validity.jsonl files hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml" @@ -169,4 +169,4 @@ rule gen_filelist: output: temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"), script: - "scripts/write_filelist.py" + "src/legenddataflow/scripts/write_filelist.py" diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index 6346978..ed48960 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -20,6 +20,9 @@ check_in_cycle = True swenv = execenv.execenv_prefix(config) meta_path = utils.metadata_path(config) det_status = utils.det_status_path(config) +configs = utils.config_path(config) +chan_maps = utils.chan_map_path(config) +meta = utils.metadata_path(config) time = datetime.now().strftime("%Y%m%dT%H%M%SZ") @@ -48,11 +51,10 @@ include: "rules/blinding_check.smk" onstart: - print("INFO: starting workflow") - - # Make sure some packages are initialized before we begin to avoid race conditions - shell('{swenv} python3 -B -c "import daq2lh5 "') + print("INFO: initializing workflow") + # Make sure some packages are initialized before we send jobs to avoid race conditions + shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5'") raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml" if raw_par_cat_file.is_file(): @@ -61,13 +63,12 @@ onstart: Path(raw_par_cat_file).parent.mkdir(parents=True, exist_ok=True) ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file) except NameError: - print("No raw parameter catalog found") + print("WARNING: no raw parameter catalog found") onsuccess: - print("Workflow finished, no error") - shell("rm *.gen || true") - shell(f"rm {utils.filelist_path(config)}/* || true") + shell("rm -f *.gen") + shell(f"rm -rf {utils.filelist_path(config)}/*") rule gen_filelist: @@ -82,7 +83,7 @@ rule gen_filelist: output: temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"), script: - "scripts/write_filelist.py" + "src/legenddataflow/scripts/write_filelist.py" rule sort_data: diff --git a/workflow/profiles/lngs-build-raw/config.yaml b/workflow/profiles/lngs-build-raw/config.yaml index 73b5cb5..14be322 100644 --- a/workflow/profiles/lngs-build-raw/config.yaml +++ b/workflow/profiles/lngs-build-raw/config.yaml @@ -1,6 +1,4 @@ cores: 30 -restart-times: 2 -max-jobs-per-second: 1 resources: - mem_swap=3500 configfile: config-lngs.yaml diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk index a78784d..d557493 100644 --- a/workflow/rules/main.smk +++ b/workflow/rules/main.smk @@ -47,7 +47,7 @@ rule autogen_output: valid_keys_path=os.path.join(pars_path(config), "valid_keys"), filedb_path=os.path.join(pars_path(config), "filedb"), setup=lambda wildcards: config, - basedir=basedir, + basedir=workflow.basedir, threads: min(workflow.cores, 64) script: - "../scripts/complete_run.py" + "../src/legenddataflow/scripts/complete_run.py" diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py index eff7a90..86dc28f 100644 --- a/workflow/src/legenddataflow/scripts/complete_run.py +++ b/workflow/src/legenddataflow/scripts/complete_run.py @@ -7,9 +7,8 @@ import time from pathlib import Path -from .. import patterns -from .. import utils as ut -from ..FileKey import FileKey +from legenddataflow import FileKey, patterns +from legenddataflow import utils as ut print("INFO: dataflow ran successfully, now few final checks and scripts") diff --git a/workflow/src/legenddataflow/scripts/write_filelist.py b/workflow/src/legenddataflow/scripts/write_filelist.py index f27c2ad..edeba98 100644 --- a/workflow/src/legenddataflow/scripts/write_filelist.py +++ b/workflow/src/legenddataflow/scripts/write_filelist.py @@ -1,7 +1,8 @@ # ruff: noqa: F821, T201 -# from snakemake.script import snakemake # snakemake > 8.16 from pathlib import Path +from snakemake.script import snakemake # snakemake > 8.16 + print(f"INFO: found {len(snakemake.input)} files") if len(snakemake.input) == 0: print( From 69660270e46d91acc2604e56f2c446bcb2a6ab26 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Thu, 6 Feb 2025 15:01:18 +0100 Subject: [PATCH 099/101] fixes for complete_run.py --- pyproject.toml | 2 +- workflow/Snakefile-build-raw | 2 +- .../legenddataflow/scripts/complete_run.py | 22 +++++++------------ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3aae00f..ebe2550 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ docs = [ dataprod = "legenddataflow.execenv:dataprod" create-chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist" merge-channels = "legenddataflow.scripts.merge_channels:merge_channels" -build-filedb = "legenddataflow.scripts.build_filedb:build_filedb" +build-filedb = "legenddataflow.scripts.filedb:build_filedb" build-tier-dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp" build-tier-evt = "legenddataflow.scripts.tier.evt:build_tier_evt" build-tier-hit = "legenddataflow.scripts.tier.hit:build_tier_hit" diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw index ed48960..c1464cd 100644 --- a/workflow/Snakefile-build-raw +++ b/workflow/Snakefile-build-raw @@ -54,7 +54,7 @@ onstart: print("INFO: initializing workflow") # Make sure some packages are initialized before we send jobs to avoid race conditions - shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5'") + shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'") raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml" if raw_par_cat_file.is_file(): diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py index 86dc28f..ea4a2b1 100644 --- a/workflow/src/legenddataflow/scripts/complete_run.py +++ b/workflow/src/legenddataflow/scripts/complete_run.py @@ -9,6 +9,7 @@ from legenddataflow import FileKey, patterns from legenddataflow import utils as ut +from legenddataflow.execenv import _execenv2str, execenv_pyexe print("INFO: dataflow ran successfully, now few final checks and scripts") @@ -199,14 +200,14 @@ def build_file_dbs(gen_tier_path, outdir): Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name ) + print(f"INFO: ......building {outfile}") + pre_cmdline, cmdenv = execenv_pyexe( + snakemake.params.setup, "build-filedb", as_string=False + ) cmdline = [ - *ut.runcmd(snakemake.params.setup, aslist=True), - "--", - "python3", - "-B", - f"{snakemake.params.basedir}/scripts/build_fdb.py", + *pre_cmdline, "--scan-path", spec, "--output", @@ -220,10 +221,8 @@ def build_file_dbs(gen_tier_path, outdir): if speck[0] == "phy": cmdline += ["--assume-nonsparse"] - cmdenv = {} - # TODO: forward stdout to log file - processes.add(subprocess.Popen(cmdline)) + processes.add(subprocess.Popen(cmdline, env=cmdenv)) if len(processes) >= snakemake.threads: os.wait() @@ -235,12 +234,7 @@ def build_file_dbs(gen_tier_path, outdir): for p in processes: if p.returncode != 0: - _cmdline = ( - " ".join([f"{k}={v}" for k, v in cmdenv.items()]) - + " " - + " ".join(p.args) - ) - msg = f"at least one FileDB building thread failed: {_cmdline}" + msg = f"at least one FileDB building thread failed: {_execenv2str(p.args, cmdenv)}" raise RuntimeError(msg) toc = time.time() From aff2370c7f4c9b16e270022d032f64db80f6244f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 6 Feb 2025 15:33:27 +0100 Subject: [PATCH 100/101] move common code into own scripts --- .../scripts/par/geds/dsp/dplms.py | 9 +--- .../scripts/par/geds/dsp/eopt.py | 6 +-- .../scripts/par/geds/dsp/evtsel.py | 32 ++++++-------- .../scripts/par/geds/dsp/tau.py | 32 ++++++-------- .../scripts/par/geds/hit/aoe.py | 33 +++++---------- .../scripts/par/geds/hit/ecal.py | 32 ++++++-------- .../legenddataflow/scripts/par/geds/hit/lq.py | 33 +++++---------- .../legenddataflow/scripts/par/geds/hit/qc.py | 29 ++++--------- .../scripts/par/geds/pht/aoe.py | 37 ++++++---------- .../scripts/par/geds/pht/ecal_part.py | 36 ++++++---------- .../scripts/par/geds/pht/fast.py | 39 +++++++---------- .../legenddataflow/scripts/par/geds/pht/lq.py | 42 +++++++------------ .../legenddataflow/scripts/par/geds/pht/qc.py | 36 +++++----------- .../scripts/par/geds/pht/qc_phy.py | 6 +-- .../scripts/par/geds/psp/average.py | 7 +--- .../scripts/par/geds/raw/blindcal.py | 7 ++-- .../scripts/par/geds/raw/blindcheck.py | 8 ++-- .../scripts/par/geds/tcm/pulser.py | 6 +-- .../legenddataflow/scripts/pulser_removal.py | 30 +++++++++++++ .../src/legenddataflow/scripts/table_name.py | 7 ++++ 20 files changed, 187 insertions(+), 280 deletions(-) create mode 100644 workflow/src/legenddataflow/scripts/pulser_removal.py create mode 100644 workflow/src/legenddataflow/scripts/table_name.py diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py index 16343dc..dabfb21 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py @@ -1,5 +1,4 @@ import argparse -import logging import pickle as pkl import time from pathlib import Path @@ -8,11 +7,11 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from lgdo import Array, Table from pygama.pargen.dplms_ge_dict import dplms_ge_dict from ....log import build_log +from ...table_name import get_table_name def par_geds_dsp_dplms() -> None: @@ -40,13 +39,9 @@ def par_geds_dsp_dplms() -> None: config_dict = configs["snakemake_rules"]["pars_dsp_dplms"] log = build_log(config_dict, args.log) - - log = logging.getLogger(__name__) sto = lh5.LH5Store() - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) configs = TextDB(args.configs).on(args.timestamp, system=args.datatype) dsp_config = config_dict["inputs"]["proc_chain"][args.channel] diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py index 6376ed5..edd215b 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py @@ -11,7 +11,6 @@ from dbetto import TextDB from dbetto.catalog import Props from dspeed.units import unit_registry as ureg -from legendmeta import LegendMetadata from pygama.math.distributions import hpge_peak from pygama.pargen.dsp_optimize import ( BayesianOptimizer, @@ -20,6 +19,7 @@ ) from ....log import build_log +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -59,9 +59,7 @@ def par_geds_dsp_eopt() -> None: sto = lh5.LH5Store() t0 = time.time() - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) dsp_config = config_dict["inputs"]["processing_chain"][args.channel] opt_json = config_dict["inputs"]["optimiser_config"][args.channel] diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py index afd4a0b..245cbb2 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py @@ -12,11 +12,12 @@ import pygama.pargen.energy_cal as pgc from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids +from pygama.pargen.data_cleaning import generate_cuts, get_keys from pygama.pargen.dsp_optimize import run_one_dsp from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -114,9 +115,7 @@ def par_geds_dsp_evtsel() -> None: sto = lh5.LH5Store() t0 = time.time() - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) dsp_config = config_dict["inputs"]["processing_chain"][args.channel] peak_json = config_dict["inputs"]["peak_config"][args.channel] @@ -135,21 +134,14 @@ def par_geds_dsp_evtsel() -> None: files = f.read().splitlines() raw_files = sorted(files) - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, peak_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_file, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=peak_dict.get( + "pulser_multiplicity_threshold" + ), + ) raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"] diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py index a86e531..c966495 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py +++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py @@ -6,12 +6,13 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids +from pygama.pargen.data_cleaning import get_cut_indexes from pygama.pargen.dsp_optimize import run_one_dsp from pygama.pargen.extract_tau import ExtractTau from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name def par_geds_dsp_tau() -> None: @@ -44,9 +45,7 @@ def par_geds_dsp_tau() -> None: log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) channel_dict = config_dict["inputs"]["processing_chain"][args.channel] kwarg_dict = config_dict["inputs"]["tau_config"][args.channel] @@ -66,21 +65,14 @@ def par_geds_dsp_tau() -> None: else: input_file = args.raw_files - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_file, + tcm_filelist=args.tcm_files, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get( + "pulser_multiplicity_threshold" + ), + ) data = sto.read( f"{channel}/raw", diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py index 74ece8e..df2719d 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py @@ -8,14 +8,14 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak -from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from ....convert_np import convert_dict_np_to_float from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -80,9 +80,7 @@ def par_geds_hit_aoe() -> None: log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) channel_dict = config_dict["inputs"]["aoecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) @@ -157,23 +155,14 @@ def eres_func(x): return_selection_mask=True, ) - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_file, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get( + "pulser_multiplicity_threshold" + ), + ) data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py index c763433..3a4e30a 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py @@ -18,13 +18,15 @@ from legendmeta import LegendMetadata from matplotlib.colors import LogNorm from pygama.math.distributions import nb_poly -from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids +from pygama.pargen.data_cleaning import get_mode_stdev from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from scipy.stats import binned_statistic from ....convert_np import convert_dict_np_to_float from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name mpl.use("agg") sto = lh5.LH5Store() @@ -478,10 +480,11 @@ def par_geds_hit_ecal() -> None: build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) + chmap = LegendMetadata(args.metadata).channelmap( + args.timestamp, system=args.datatype + ) det_status = chmap[args.channel]["analysis"]["usability"] if args.in_hit_dict: @@ -529,21 +532,12 @@ def par_geds_hit_ecal() -> None: cal_energy_param="trapTmax", ) - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_file, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"), + ) data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py index b4dc3f2..6bacb36 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py @@ -8,16 +8,16 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data from ....convert_np import convert_dict_np_to_float from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -79,9 +79,7 @@ def par_geds_hit_lq() -> None: log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) @@ -135,23 +133,14 @@ def eres_func(x): return_selection_mask=True, ) - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold") - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_file, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get( + "pulser_multiplicity_threshold" + ), + ) data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py index 33934c4..0f9387c 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py @@ -10,17 +10,17 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, - get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data from ....convert_np import convert_dict_np_to_float from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -62,9 +62,7 @@ def par_geds_hit_qc() -> None: log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) # get metadata dictionary channel_dict = config_dict["inputs"]["qc_config"][args.channel] @@ -198,21 +196,12 @@ def par_geds_hit_qc() -> None: cal_energy_param="trapTmax", ) - if args.pulser_file: - pulser_dict = Props.read_from(args.pulser_file) - mask = np.array(pulser_dict["mask"]) - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_file, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"), + ) data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py index 76383ef..fd21aa3 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py @@ -14,11 +14,12 @@ from legendmeta import LegendMetadata from pygama.pargen.AoE_cal import * # noqa: F403 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak -from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from ....FileKey import ChannelProcKey, ProcessingFileKey from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -264,9 +265,7 @@ def par_geds_pht_aoe() -> None: build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) channel_dict = config_dict["inputs"]["par_pht_aoecal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) @@ -345,26 +344,16 @@ def par_geds_pht_aoe() -> None: return_selection_mask=True, ) - if args.pulser_files: - mask = np.array([], dtype=bool) - for file in args.pulser_files: - pulser_dict = Props.read_from(file) - pulser_mask = np.array(pulser_dict["mask"]) - mask = np.append(mask, pulser_mask) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_files, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get( + "pulser_multiplicity_threshold" + ), + ) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py index 6d9babf..ccee7c1 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py @@ -15,12 +15,13 @@ from dbetto.catalog import Props from legendmeta import LegendMetadata from pygama.math.distributions import nb_poly -from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration from pygama.pargen.utils import load_data from ....FileKey import ChannelProcKey, ProcessingFileKey from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -464,9 +465,8 @@ def calibrate_partition( log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) + chmap = LegendMetadata(path=args.metadata).on(args.timestamp, system=args.datatype) cal_dict = {} results_dicts = {} @@ -529,26 +529,14 @@ def calibrate_partition( cal_energy_param=kwarg_dict["energy_params"][0], ) - if args.pulser_files: - mask = np.array([], dtype=bool) - for file in args.pulser_files: - pulser_dict = Props.read_from(file) - pulser_mask = np.array(pulser_dict["mask"]) - mask = np.append(mask, pulser_mask) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_files, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"), + ) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py index c3089e1..c1ac946 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py @@ -11,7 +11,6 @@ from dbetto import TextDB from dbetto.catalog import Props from legendmeta import LegendMetadata -from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.utils import load_data from workflow.src.legenddataflow.scripts.par.geds.pht.aoe import run_aoe_calibration from workflow.src.legenddataflow.scripts.par.geds.pht.lq import run_lq_calibration @@ -19,6 +18,8 @@ from ....FileKey import ChannelProcKey, ProcessingFileKey from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) warnings.filterwarnings(action="ignore", category=np.RankWarning) @@ -85,9 +86,10 @@ def par_geds_pht_fast() -> None: build_log(config_dict["pars_pht_partcal"], args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) + chmap = LegendMetadata(args.metadata).channelmap( + args.timestamp, system=args.datatype + ) cal_dict = {} results_dicts = {} @@ -186,27 +188,14 @@ def par_geds_pht_fast() -> None: cal_energy_param=kwarg_dict["energy_params"][0], ) - if args.pulser_files: - mask = np.array([], dtype=bool) - for file in args.pulser_files: - with Path(file).open() as f: - pulser_dict = json.load(f) - pulser_mask = np.array(pulser_dict["mask"]) - mask = np.append(mask, pulser_mask) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_files, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"), + ) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py index f46914c..12cd06d 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py @@ -14,13 +14,14 @@ from legendmeta import LegendMetadata from pygama.math.distributions import gaussian from pygama.pargen.AoE_cal import * # noqa: F403 -from pygama.pargen.data_cleaning import get_tcm_pulser_ids from pygama.pargen.lq_cal import * # noqa: F403 from pygama.pargen.lq_cal import LQCal from pygama.pargen.utils import load_data -from ..FileKey import ChannelProcKey, ProcessingFileKey -from ..log import build_log +from ....FileKey import ChannelProcKey, ProcessingFileKey +from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -281,9 +282,7 @@ def eres_func(x): log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) channel_dict = config_dict["inputs"]["lqcal_config"][args.channel] kwarg_dict = Props.read_from(channel_dict) @@ -353,27 +352,16 @@ def eres_func(x): return_selection_mask=True, ) - if args.pulser_files: - mask = np.array([], dtype=bool) - for file in args.pulser_files: - with Path(file).open() as f: - pulser_dict = json.load(f) - pulser_mask = np.array(pulser_dict["mask"]) - mask = np.append(mask, pulser_mask) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + mask = get_pulser_mask( + pulser_file=args.pulser_files, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get( + "pulser_multiplicity_threshold" + ), + ) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") data["is_pulser"] = mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py index c6ac3df..89c9f4d 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py @@ -10,17 +10,17 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( generate_cut_classifiers, get_keys, - get_tcm_pulser_ids, ) from pygama.pargen.utils import load_data from ....convert_np import convert_dict_np_to_float from ....log import build_log +from ...pulser_removal import get_pulser_mask +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -64,9 +64,7 @@ log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) # get metadata dictionary channel_dict = config_dict["inputs"]["qc_config"][args.channel] @@ -220,26 +218,14 @@ cal_energy_param="trapTmax", ) - if args.pulser_files: - total_mask = np.array([], dtype=bool) - for file in args.pulser_files: - pulser_dict = Props.read_from(file) - pulser_mask = np.array(pulser_dict["mask"]) - total_mask = np.append(total_mask, pulser_mask) - if "pulser_multiplicity_threshold" in kwarg_dict: - kwarg_dict.pop("pulser_multiplicity_threshold") - - elif args.tcm_filelist: - # get pulser mask from tcm files - with Path(args.tcm_filelist).open() as f: - tcm_files = f.read().splitlines() - tcm_files = sorted(np.unique(tcm_files)) - ids, total_mask = get_tcm_pulser_ids( - tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"] - ) - else: - msg = "No pulser file or tcm filelist provided" - raise ValueError(msg) + total_mask = get_pulser_mask( + pulser_file=args.pulser_files, + tcm_filelist=args.tcm_filelist, + channel=channel, + pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"), + ) + if "pulser_multiplicity_threshold" in kwarg_dict: + kwarg_dict.pop("pulser_multiplicity_threshold") data["is_pulser"] = total_mask[threshold_mask] diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py index 9007ad7..fa11226 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py +++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py @@ -11,7 +11,6 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from lgdo.lh5 import ls from pygama.pargen.data_cleaning import ( generate_cut_classifiers, @@ -20,6 +19,7 @@ from ....convert_np import convert_dict_np_to_float from ....log import build_log +from ...table_name import get_table_name warnings.filterwarnings(action="ignore", category=RuntimeWarning) @@ -52,9 +52,7 @@ log = build_log(config_dict, args.log) - meta = LegendMetadata(path=args.metadata) - chmap = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) # get metadata dictionary channel_dict = config_dict["qc_config"][args.channel] diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py index 3ba1423..99bb28a 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py +++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py @@ -7,8 +7,7 @@ import matplotlib.dates as mdates import matplotlib.pyplot as plt import numpy as np -from dbetto.catalog import Props -from legendmeta import LegendMetadata +from dbetto.catalog import Props, TextDB from ....FileKey import ChannelProcKey @@ -44,9 +43,7 @@ def par_geds_psp_average() -> None: argparser.add_argument("--channel", help="Channel", type=str, required=True) args = argparser.parse_args() - configs = LegendMetadata(args.configs, lazy=True).on( - args.timestamp, system=args.datatype - ) + configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype) merge_config = Props.read_from( configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel] ) diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py index eeaaf2b..e1d8b30 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py @@ -13,10 +13,11 @@ import matplotlib.pyplot as plt import numpy as np from dbetto.catalog import Props -from legendmeta import LegendMetadata from lgdo import lh5 from pygama.pargen.energy_cal import HPGeCalibration +from ...table_name import get_table_name + mpl.use("agg") @@ -45,9 +46,7 @@ def par_geds_raw_blindcal() -> None: logging.getLogger("matplotlib").setLevel(logging.INFO) log = logging.getLogger(__name__) - meta = LegendMetadata(path=args.meta) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid:07}" + channel = get_table_name(args.meta, args.timestamp, args.datatype, args.channel) # peaks to search for peaks_keV = np.array( diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py index 5f60c54..165edb4 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py +++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py @@ -22,6 +22,7 @@ from pygama.pargen.energy_cal import get_i_local_maxima from ....log import build_log +from ...table_name import get_table_name mpl.use("Agg") @@ -48,10 +49,9 @@ def par_geds_raw_blindcheck() -> None: log = build_log(config_dict, args.log) # get the usability status for this channel - chmap = LegendMetadata(args.metadata, lazy=True).channelmap( - args.timestamp, system=args.datatype - ) - channel = f"ch{chmap[args.channel].daq.rawid:07}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) + + chmap = LegendMetadata(args.meta).channelmap(args.timestamp, system=args.datatype) det_status = chmap[args.channel]["analysis"]["is_blinded"] # read in calibration curve for this channel diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py index 4c75d62..1b0cceb 100644 --- a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py +++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py @@ -4,10 +4,10 @@ import numpy as np from dbetto import TextDB from dbetto.catalog import Props -from legendmeta import LegendMetadata from pygama.pargen.data_cleaning import get_tcm_pulser_ids from ....log import build_log +from ...table_name import get_table_name def par_geds_tcm_pulser() -> None: @@ -35,9 +35,7 @@ def par_geds_tcm_pulser() -> None: kwarg_dict = config_dict["inputs"]["pulser_config"] kwarg_dict = Props.read_from(kwarg_dict) - meta = LegendMetadata(path=args.metadata) - channel_dict = meta.channelmap(args.timestamp, system=args.datatype) - channel = f"ch{channel_dict[args.channel].daq.rawid}" + channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel) if ( isinstance(args.tcm_files, list) diff --git a/workflow/src/legenddataflow/scripts/pulser_removal.py b/workflow/src/legenddataflow/scripts/pulser_removal.py new file mode 100644 index 0000000..40b3045 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/pulser_removal.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import numpy as np +from dbetto.catalog import Props +from pygama.pargen.data_cleaning import get_tcm_pulser_ids + + +def get_pulser_mask( + pulser_file=None, tcm_filelist=None, channel=None, pulser_multiplicity_threshold=10 +): + if pulser_file is not None: + if not isinstance(pulser_file, list): + pulser_file = [pulser_file] + mask = np.array([], dtype=bool) + for file in pulser_file: + pulser_dict = Props.read_from(file) + pulser_mask = np.array(pulser_dict["mask"]) + mask = np.append(mask, pulser_mask) + + elif tcm_filelist is not None: + # get pulser mask from tcm files + with Path(tcm_filelist).open() as f: + tcm_files = f.read().splitlines() + tcm_files = sorted(np.unique(tcm_files)) + _, mask = get_tcm_pulser_ids(tcm_files, channel, pulser_multiplicity_threshold) + else: + msg = "No pulser file or tcm filelist provided" + raise ValueError(msg) + + return mask diff --git a/workflow/src/legenddataflow/scripts/table_name.py b/workflow/src/legenddataflow/scripts/table_name.py new file mode 100644 index 0000000..653bbb1 --- /dev/null +++ b/workflow/src/legenddataflow/scripts/table_name.py @@ -0,0 +1,7 @@ +from legendmeta import LegendMetadata + + +def get_table_name(metadata_path, timestamp, datatype, detector): + meta = LegendMetadata(path=metadata_path) + channel_dict = meta.channelmap(timestamp, system=datatype) + return f"ch{channel_dict[detector].daq.rawid:07}" From cbb29c85ca51aa3e049cd2127d2929dd62d1299e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 6 Feb 2025 16:16:19 +0100 Subject: [PATCH 101/101] make table name lazy --- workflow/src/legenddataflow/scripts/table_name.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/src/legenddataflow/scripts/table_name.py b/workflow/src/legenddataflow/scripts/table_name.py index 653bbb1..935e811 100644 --- a/workflow/src/legenddataflow/scripts/table_name.py +++ b/workflow/src/legenddataflow/scripts/table_name.py @@ -2,6 +2,6 @@ def get_table_name(metadata_path, timestamp, datatype, detector): - meta = LegendMetadata(path=metadata_path) + meta = LegendMetadata(path=metadata_path, lazy=True) channel_dict = meta.channelmap(timestamp, system=datatype) return f"ch{channel_dict[detector].daq.rawid:07}"