From 3284d6129004460107eb94182b9efc32d7793e8e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 9 Oct 2024 17:10:29 +0200
Subject: [PATCH 001/101] add ann tier

---
 Snakefile                |   1 +
 rules/ann.smk            |  50 ++++++++++++++++
 rules/evt.smk            |  72 ++++++++++++++++++++++-
 scripts/build_ann.py     | 124 +++++++++++++++++++++++++++++++++++++++
 scripts/build_evt.py     |  18 ++++--
 scripts/util/patterns.py |  26 ++++++++
 scripts/util/utils.py    |  12 ++++
 templates/config.json    |   2 +
 8 files changed, 298 insertions(+), 7 deletions(-)
 create mode 100644 rules/ann.smk
 create mode 100644 scripts/build_ann.py

diff --git a/Snakefile b/Snakefile
index 4738359..5069de0 100644
--- a/Snakefile
+++ b/Snakefile
@@ -59,6 +59,7 @@ include: "rules/psp.smk"
 include: "rules/hit.smk"
 include: "rules/pht.smk"
 include: "rules/pht_fast.smk"
+include: "rules/ann.smk"
 include: "rules/evt.smk"
 include: "rules/skm.smk"
 include: "rules/blinding_calibration.smk"
diff --git a/rules/ann.smk b/rules/ann.smk
new file mode 100644
index 0000000..f7e6b1c
--- /dev/null
+++ b/rules/ann.smk
@@ -0,0 +1,50 @@
+"""
+Snakemake rules for processing ann tier. This is done only for the coax detectors
+to apply the ann and risetime cuts for psd.
+
+"""
+
+from scripts.util.pars_loading import pars_catalog
+from scripts.util.utils import par_dsp_path
+from scripts.util.patterns import (
+    get_pattern_tier_dsp,
+    get_pattern_tier_psp,
+    get_pattern_tier_ann,
+    get_pattern_tier,
+    get_pattern_log,
+    get_pattern_pars,
+    get_pattern_pars_overwrite,
+)
+
+for tier in ["ann", "pan"]:
+
+    rule:
+        input:
+            dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup),
+            pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"),
+        params:
+            timestamp="{timestamp}",
+            datatype="{datatype}",
+        output:
+            tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+            db_file=get_pattern_pars_tmp(setup, f"{tier}_db"),
+        log:
+            get_pattern_log(setup, f"tier_{tier}"),
+        group:
+            "tier-ann"
+        resources:
+            runtime=300,
+            mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+        shell:
+            "{swenv} python3 -B "
+            f"{workflow.source_path('../scripts/build_ann.py')} "
+            "--log {log} "
+            "--configs {configs} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "--input {input.dsp_file} "
+            "--output {output.tier_file} "
+            "--db_file {output.db_file} "
+            "--pars_file {input.pars_file} "
+    
+    set_last_rule_name(workflow, f"build_{tier}")
\ No newline at end of file
diff --git a/rules/evt.smk b/rules/evt.smk
index ed20d2d..1026d9b 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -9,6 +9,8 @@ from scripts.util.patterns import (
     get_pattern_tier_tcm,
     get_pattern_tier_pht,
     get_pattern_tier_psp,
+    get_pattern_tier_pan,
+    get_pattern_tier_ann,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
@@ -31,6 +33,18 @@ for tier in ("evt", "pet"):
                 else get_pattern_tier_pht(setup)
             ),
             tcm_file=get_pattern_tier_tcm(setup),
+            ann_file=lambda wildcards: (
+                get_pattern_tier_ann(setup)
+                if tier == "evt"
+                else get_pattern_tier_pan(setup)
+            ),
+            # needs snakemake >= 8.3
+            # ann_file= branch(
+            #     lambda wildcards: tier if int(wildcards["period"][1:]) <= 11 else False,
+            #     cases = {"evt":get_pattern_tier_ann(setup),
+            #     "pet":get_pattern_tier_pan(setup),
+            #     }
+            # ),
             xtalk_matrix=lambda wildcards: get_svm_file(
                 tier=tier, wildcards=wildcards, name="xtc"
             ),
@@ -63,10 +77,66 @@ for tier in ("evt", "pet"):
             "--par_files {input.par_files} "
             "--hit_file {input.hit_file} "
             "--tcm_file {input.tcm_file} "
+            "--ann_file {input.ann_file} "
             "--dsp_file {input.dsp_file} "
             "--output {output.evt_file} "
 
-    set_last_rule_name(workflow, f"build_{tier}")
+    set_last_rule_name(workflow, f"build_{tier}_with_ann")
+    # ann_rule = list(workflow.rules)[-1]
+
+    # rule:
+    #     input:
+    #         dsp_file=(
+    #             get_pattern_tier_dsp(setup)
+    #             if tier == "evt"
+    #             else get_pattern_tier_psp(setup)
+    #         ),
+    #         hit_file=(
+    #             get_pattern_tier_hit(setup)
+    #             if tier == "evt"
+    #             else get_pattern_tier_pht(setup)
+    #         ),
+    #         tcm_file=get_pattern_tier_tcm(setup),
+    #         xtalk_matrix=lambda wildcards: get_svm_file(
+    #             tier=tier, wildcards=wildcards, name="xtc"
+    #         ),
+    #         par_files=lambda wildcards: pars_catalog.get_par_file(
+    #             setup, wildcards.timestamp, "pht"
+    #         ),
+    #     output:
+    #         evt_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
+    #     params:
+    #         timestamp="{timestamp}",
+    #         datatype="{datatype}",
+    #         tier=tier,
+    #     log:
+    #         get_pattern_log(setup, f"tier_{tier}"),
+    #     group:
+    #         "tier-evt"
+    #     resources:
+    #         runtime=300,
+    #         mem_swap=50,
+    #     shell:
+    #         "{swenv} python3 -B "
+    #         f"{workflow.source_path('../scripts/build_evt.py')} "
+    #         "--configs {configs} "
+    #         "--metadata {meta} "
+    #         "--log {log} "
+    #         "--tier {params.tier} "
+    #         "--datatype {params.datatype} "
+    #         "--timestamp {params.timestamp} "
+    #         "--xtc_file {input.xtalk_matrix} "
+    #         "--par_files {input.par_files} "
+    #         "--hit_file {input.hit_file} "
+    #         "--tcm_file {input.tcm_file} "
+    #         "--dsp_file {input.dsp_file} "
+    #         "--output {output.evt_file} "
+
+    # set_last_rule_name(workflow, f"build_{tier}")
+    # no_ann_rule = list(workflow.rules)[-1]
+
+    # rule_order_list = [ann_rule, no_ann_rule]
+    # workflow._ruleorder.add(*rule_order_list)
 
     rule:
         wildcard_constraints:
diff --git a/scripts/build_ann.py b/scripts/build_ann.py
new file mode 100644
index 0000000..1f0f67f
--- /dev/null
+++ b/scripts/build_ann.py
@@ -0,0 +1,124 @@
+import argparse
+import json
+import logging
+import os
+import pathlib
+import re
+import time
+import warnings
+
+os.environ["LGDO_CACHE"] = "false"
+os.environ["LGDO_BOUNDSCHECK"] = "false"
+os.environ["DSPEED_CACHE"] = "false"
+os.environ["DSPEED_BOUNDSCHECK"] = "false"
+
+import lgdo.lh5 as lh5
+import numpy as np
+from dspeed import build_dsp
+from legendmeta import LegendMetadata
+from legendmeta.catalog import Props
+
+
+def replace_list_with_array(dic):
+    for key, value in dic.items():
+        if isinstance(value, dict):
+            dic[key] = replace_list_with_array(value)
+        elif isinstance(value, list):
+            dic[key] = np.array(value, dtype="float32")
+        else:
+            pass
+    return dic
+
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
+argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--input", help="input file", type=str)
+argparser.add_argument("--output", help="output file", type=str)
+argparser.add_argument("--db_file", help="db file", type=str)
+args = argparser.parse_args()
+
+pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+logging.getLogger("numba").setLevel(logging.INFO)
+logging.getLogger("parse").setLevel(logging.INFO)
+logging.getLogger("lgdo").setLevel(logging.INFO)
+log = logging.getLogger(__name__)
+
+configs = LegendMetadata(path=args.configs)
+channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][
+    "inputs"
+]["processing_chain"]
+
+channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
+db_files = [
+    par_file
+    for par_file in args.pars_file
+    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml"
+]
+
+database_dic = Props.read_from(db_files, subst_pathvar=True)
+database_dic = replace_list_with_array(database_dic)
+
+pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+
+rng = np.random.default_rng()
+rand_num = f"{rng.integers(0,99999):05d}"
+temp_output = f"{args.output}.{rand_num}"
+
+start = time.time()
+
+build_dsp(
+    args.input,
+    temp_output,
+    {},
+    database=database_dic,
+    chan_config=channel_dict,
+    write_mode="r",
+    buffer_len=3200 if args.datatype == "cal" else 3200,
+    block_width=16,
+)
+
+log.info(f"build_ann finished in {time.time()-start}")
+
+os.rename(temp_output, args.output)
+
+if "ann" in args.output:
+    key = os.path.basename(args.output).replace("-tier_ann.lh5", "")
+else: 
+    key = os.path.basename(args.output).replace("-tier_pan.lh5", "")
+
+raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
+
+raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
+
+outputs = {}
+channels = []
+for channel, chan_dict in channel_dict.items():
+    output = chan_dict["outputs"]
+    in_dict = False
+    for entry in outputs:
+        if outputs[entry]["fields"] == output:
+            outputs[entry]["channels"].append(channel.split("/")[0])
+            in_dict = True
+    if in_dict is False:
+        outputs[f"group{len(list(outputs))+1}"] = {
+            "channels": [channel.split("/")[0]],
+            "fields": output,
+        }
+    channels.append(channel.split("/")[0])
+
+full_dict = {
+    "valid_fields": {
+        "ann": outputs,
+    },
+    "valid_keys": {key: {"valid_channels": {"ann": channels}}},
+}
+pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
+with open(args.db_file, "w") as w:
+    json.dump(full_dict, w, indent=4)
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 1fcd347..5a808b2 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -35,6 +35,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 argparser.add_argument("--hit_file", help="hit file", type=str)
 argparser.add_argument("--dsp_file", help="dsp file", type=str)
 argparser.add_argument("--tcm_file", help="tcm file", type=str)
+argparser.add_argument("--ann_file", help="ann file")
 argparser.add_argument("--xtc_file", help="xtc file", type=str)
 argparser.add_argument("--par_files", help="par files", nargs="*")
 
@@ -125,13 +126,18 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 rand_num = f"{rng.integers(0,99999):05d}"
 temp_output = f"{args.output}.{rand_num}"
 
+file_table = {
+    "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
+    "dsp": (args.dsp_file, "dsp", "ch{}"),
+    "hit": (args.hit_file, "hit", "ch{}"),
+    "evt": (None, "evt"),
+}
+
+if args.ann_file is not None:
+    file_table["ann"] = (args.ann_file, "dsp", "ch{}")
+
 table = build_evt(
-    {
-        "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
-        "dsp": (args.dsp_file, "dsp", "ch{}"),
-        "hit": (args.hit_file, "hit", "ch{}"),
-        "evt": (None, "evt"),
-    },
+    file_table,
     evt_config,
 )
 
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 79bcaac..2629e7e 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -16,10 +16,12 @@
     pars_path,
     plts_path,
     sandbox_path,
+    tier_ann_path,
     tier_daq_path,
     tier_dsp_path,
     tier_evt_path,
     tier_hit_path,
+    tier_pan_path,
     tier_path,
     tier_pet_path,
     tier_pht_path,
@@ -137,6 +139,16 @@ def get_pattern_tier_hit(setup):
     )
 
 
+def get_pattern_tier_ann(setup):
+    return os.path.join(
+        f"{tier_ann_path(setup)}",
+        "{datatype}",
+        "{period}",
+        "{run}",
+        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_ann.lh5",
+    )
+
+
 def get_pattern_tier_evt(setup):
     return os.path.join(
         f"{tier_evt_path(setup)}",
@@ -175,6 +187,16 @@ def get_pattern_tier_pht(setup):
     )
 
 
+def get_pattern_tier_pan(setup):
+    return os.path.join(
+        f"{tier_pan_path(setup)}",
+        "{datatype}",
+        "{period}",
+        "{run}",
+        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pan.lh5",
+    )
+
+
 def get_pattern_tier_pet(setup):
     return os.path.join(
         f"{tier_pet_path(setup)}",
@@ -212,6 +234,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
         file_pattern = get_pattern_tier_dsp(setup)
     elif tier == "hit":
         file_pattern = get_pattern_tier_hit(setup)
+    elif tier == "ann":
+        file_pattern = get_pattern_tier_ann(setup)
     elif tier == "evt":
         file_pattern = get_pattern_tier_evt(setup)
     elif tier == "evt_concat":
@@ -220,6 +244,8 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
         file_pattern = get_pattern_tier_psp(setup)
     elif tier == "pht":
         file_pattern = get_pattern_tier_pht(setup)
+    elif tier == "pan":
+        file_pattern = get_pattern_tier_pan(setup)
     elif tier == "pet":
         file_pattern = get_pattern_tier_pet(setup)
     elif tier == "pet_concat":
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index f3f3ebc..5ec88b0 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -53,6 +53,10 @@ def tier_hit_path(setup):
     return setup["paths"]["tier_hit"]
 
 
+def tier_ann_path(setup):
+    return setup["paths"]["tier_ann"]
+
+
 def tier_evt_path(setup):
     return setup["paths"]["tier_evt"]
 
@@ -65,6 +69,10 @@ def tier_pht_path(setup):
     return setup["paths"]["tier_pht"]
 
 
+def tier_pan_path(setup):
+    return setup["paths"]["tier_pan"]
+
+
 def tier_pet_path(setup):
     return setup["paths"]["tier_pet"]
 
@@ -82,12 +90,16 @@ def get_tier_path(setup, tier):
         return tier_dsp_path(setup)
     elif tier == "hit":
         return tier_hit_path(setup)
+    elif tier == "ann":
+        return tier_ann_path(setup)
     elif tier == "evt":
         return tier_evt_path(setup)
     elif tier == "psp":
         return tier_psp_path(setup)
     elif tier == "pht":
         return tier_pht_path(setup)
+    elif tier == "pan":
+        return tier_pan_path(setup)
     elif tier == "pet":
         return tier_pet_path(setup)
     elif tier == "skm":
diff --git a/templates/config.json b/templates/config.json
index 7d17f71..a86db97 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -19,9 +19,11 @@
         "tier_tcm": "$_/generated/tier/tcm",
         "tier_dsp": "$_/generated/tier/dsp",
         "tier_hit": "$_/generated/tier/hit",
+        "tier_ann": "$_/generated/tier/ann",
         "tier_evt": "$_/generated/tier/evt",
         "tier_psp": "$_/generated/tier/psp",
         "tier_pht": "$_/generated/tier/pht",
+        "tier_pan": "$_/generated/tier/pan",
         "tier_pet": "$_/generated/tier/pet",
         "tier_skm": "$_/generated/tier/skm",
 

From 26d52f25c6565cb8cd3af147c0e13dfb61cf1877 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sun, 20 Oct 2024 14:55:31 +0200
Subject: [PATCH 002/101] allow more jobs

---
 rules/ann.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/ann.smk b/rules/ann.smk
index f7e6b1c..ff24820 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -34,7 +34,7 @@ for tier in ["ann", "pan"]:
             "tier-ann"
         resources:
             runtime=300,
-            mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+            mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
         shell:
             "{swenv} python3 -B "
             f"{workflow.source_path('../scripts/build_ann.py')} "

From 7918e830a4ce913166787b89f0f526bea7051ea8 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 21 Oct 2024 23:10:29 +0200
Subject: [PATCH 003/101] pc cleanup

---
 rules/ann.smk        | 10 +++++++---
 scripts/build_ann.py |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/rules/ann.smk b/rules/ann.smk
index ff24820..64cdd50 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -20,7 +20,11 @@ for tier in ["ann", "pan"]:
 
     rule:
         input:
-            dsp_file=get_pattern_tier_dsp(setup) if tier == "ann" else get_pattern_tier_psp(setup),
+            dsp_file=(
+                get_pattern_tier_dsp(setup)
+                if tier == "ann"
+                else get_pattern_tier_psp(setup)
+            ),
             pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"),
         params:
             timestamp="{timestamp}",
@@ -46,5 +50,5 @@ for tier in ["ann", "pan"]:
             "--output {output.tier_file} "
             "--db_file {output.db_file} "
             "--pars_file {input.pars_file} "
-    
-    set_last_rule_name(workflow, f"build_{tier}")
\ No newline at end of file
+
+    set_last_rule_name(workflow, f"build_{tier}")
diff --git a/scripts/build_ann.py b/scripts/build_ann.py
index 1f0f67f..224877a 100644
--- a/scripts/build_ann.py
+++ b/scripts/build_ann.py
@@ -90,7 +90,7 @@ def replace_list_with_array(dic):
 
 if "ann" in args.output:
     key = os.path.basename(args.output).replace("-tier_ann.lh5", "")
-else: 
+else:
     key = os.path.basename(args.output).replace("-tier_pan.lh5", "")
 
 raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]

From e9561bdf62f0dc542721643ad8376e105e8b34c5 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 21 Oct 2024 23:10:40 +0200
Subject: [PATCH 004/101] bump pkg versions

---
 templates/config.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/templates/config.json b/templates/config.json
index a86db97..9fd0d0f 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -55,9 +55,9 @@
         "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif"
       },
       "pkg_versions": {
-        "pygama": "pygama==2.0.1",
+        "pygama": "pygama==2.0.3",
         "pylegendmeta": "pylegendmeta==0.10.2",
-        "dspeed": "dspeed==1.4.0a1",
+        "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.7.0",
         "legend-daq2lh5": "legend-daq2lh5==1.2.1"
       }

From a3c0dae6588ac4bbaeacabceb8602c3826ef55f2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 21 Oct 2024 23:18:39 +0200
Subject: [PATCH 005/101] add ml packages

---
 templates/config.json | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/templates/config.json b/templates/config.json
index 9fd0d0f..0d1320d 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -59,7 +59,10 @@
         "pylegendmeta": "pylegendmeta==0.10.2",
         "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.7.0",
-        "legend-daq2lh5": "legend-daq2lh5==1.2.1"
+        "legend-daq2lh5": "legend-daq2lh5==1.2.1",
+        "tensorflow": "tensorflow==2.17",
+        "keras": "keras==3.6.0",
+        "jax": "jax==0.4.30"
       }
     }
   }

From 818511da149ae57f954a4a5fa9aaba075e1ddfa2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 15:15:38 +0100
Subject: [PATCH 006/101] refactor for new metadata, clean up patterns and some
 naming

---
 scripts/build_dsp.py                          |   4 +-
 scripts/create_chankeylist.py                 |   2 +-
 scripts/util/CalibCatalog.py                  | 128 ------
 .../util/{dataset_cal.py => cal_grouping.py}  |  13 +-
 scripts/util/catalog.py                       | 191 ++++++++
 scripts/util/create_pars_keylist.py           |  11 +-
 scripts/util/pars_loading.py                  |   8 +-
 scripts/util/patterns.py                      | 407 +++---------------
 scripts/util/utils.py                         | 134 ++----
 9 files changed, 309 insertions(+), 589 deletions(-)
 delete mode 100644 scripts/util/CalibCatalog.py
 rename scripts/util/{dataset_cal.py => cal_grouping.py} (92%)
 create mode 100644 scripts/util/catalog.py

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 8dad8fa..cbd0794 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -51,9 +51,7 @@ def replace_list_with_array(dic):
 
 channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
 db_files = [
-    par_file
-    for par_file in args.pars_file
-    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yaml"
+    par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index c4c6cb9..435f55c 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -20,7 +20,7 @@
 chmap = channel_map.channelmaps.on(args.timestamp)
 
 channels = [
-    f"ch{chmap[chan].daq.rawid:03}"
+    chan
     for chan in status_map
     if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
 ]
diff --git a/scripts/util/CalibCatalog.py b/scripts/util/CalibCatalog.py
deleted file mode 100644
index b222c5d..0000000
--- a/scripts/util/CalibCatalog.py
+++ /dev/null
@@ -1,128 +0,0 @@
-#
-# Copyright (C) 2015 Oliver Schulz <oschulz@mpp.mpg.de>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-This module stores the scripts for leading validity files based on timestamp and system
-"""
-
-import bisect
-import collections
-import copy
-import json
-import types
-from collections import namedtuple
-from pathlib import Path
-
-from .utils import unix_time
-
-
-class Props:
-    @staticmethod
-    def read_from(sources):
-        def read_impl(sources):
-            if isinstance(sources, (str, Path)):
-                file_name = sources
-                with open(file_name) as file:
-                    return json.load(file)
-            elif isinstance(sources, list):
-                result = {}
-                for p in map(read_impl, sources):
-                    Props.add_to(result, p)
-                return result
-            else:
-                msg = f"Can't run Props.read_from on sources-value of type {type(sources)}"
-                raise ValueError(msg)
-
-        return read_impl(sources)
-
-    @staticmethod
-    def add_to(props_a, props_b):
-        a = props_a
-        b = props_b
-
-        for key in b:
-            if key in a:
-                if isinstance(a[key], dict) and isinstance(b[key], dict):
-                    Props.add_to(a[key], b[key])
-                elif a[key] != b[key]:
-                    a[key] = copy.copy(b[key])
-            else:
-                a[key] = copy.copy(b[key])
-
-
-class PropsStream:
-    @staticmethod
-    def get(value):
-        if isinstance(value, (str, Path)):
-            return PropsStream.read_from(value)
-        elif isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
-            return value
-        else:
-            msg = f"Can't get PropsStream from value of type {type(value)}"
-            raise ValueError(msg)
-
-    @staticmethod
-    def read_from(file_name):
-        with open(file_name) as file:
-            for json_str in file:
-                yield json.loads(json_str)
-
-
-class CalibCatalog(namedtuple("CalibCatalog", ["entries"])):
-    __slots__ = ()
-
-    class Entry(namedtuple("Entry", ["valid_from", "file"])):
-        __slots__ = ()
-
-    @staticmethod
-    def read_from(file_name):
-        entries = {}
-
-        for props in PropsStream.get(file_name):
-            timestamp = props["valid_from"]
-            system = "all" if props.get("category") is None else props["category"]
-            file_key = props["apply"]
-            if system not in entries:
-                entries[system] = []
-            entries[system].append(CalibCatalog.Entry(unix_time(timestamp), file_key))
-
-        for system in entries:
-            entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from)
-        return CalibCatalog(entries)
-
-    def calib_for(self, timestamp, category="all", allow_none=False):
-        if category in self.entries:
-            valid_from = [entry.valid_from for entry in self.entries[category]]
-            pos = bisect.bisect_right(valid_from, unix_time(timestamp))
-            if pos > 0:
-                return self.entries[category][pos - 1].file
-            else:
-                if allow_none:
-                    return None
-                else:
-                    msg = f"No valid calibration found for timestamp: {timestamp}, category: {category}"
-                    raise RuntimeError(msg)
-        else:
-            if allow_none:
-                return None
-            else:
-                msg = f"No calibrations found for category: {category}"
-                raise RuntimeError(msg)
-
-    @staticmethod
-    def get_calib_files(catalog_file, timestamp, category="all"):
-        catalog = CalibCatalog.read_from(catalog_file)
-        return CalibCatalog.calib_for(catalog, timestamp, category)
diff --git a/scripts/util/dataset_cal.py b/scripts/util/cal_grouping.py
similarity index 92%
rename from scripts/util/dataset_cal.py
rename to scripts/util/cal_grouping.py
index 693e934..aec1572 100644
--- a/scripts/util/dataset_cal.py
+++ b/scripts/util/cal_grouping.py
@@ -14,12 +14,23 @@
 from .utils import filelist_path
 
 
-class dataset_file:
+class cal_grouping:
     def __init__(self, setup, input_file):
         with open(input_file) as r:
             self.datasets = json.load(r)
+        self.expand_runs()
         self.setup = setup
 
+    def expand_runs(self):
+        for channel, chan_dict in self.datasets.items():
+            for part, part_dict in chan_dict.items():
+                for per, runs in part_dict.items():
+                    if isinstance(runs, str) and ".." in runs:
+                        start, end = runs.split("..")
+                        self.datasets[channel][part][per] = [
+                            f"r{x:02}" for x in range(int(start[2:]), int(end) + 1)
+                        ]
+
     def get_dataset(self, dataset, channel):
         partition_dict = self.datasets["default"].copy()
         if channel in self.datasets:
diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py
new file mode 100644
index 0000000..1fb516b
--- /dev/null
+++ b/scripts/util/catalog.py
@@ -0,0 +1,191 @@
+#
+# Copyright (C) 2015 Oliver Schulz <oschulz@mpp.mpg.de>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module stores the scripts for leading validity files based on timestamp and system
+"""
+
+import bisect
+import collections
+import copy
+import json
+import types
+from collections import namedtuple
+from pathlib import Path
+
+import yaml
+
+from .utils import unix_time
+
+
+class Props:
+    @staticmethod
+    def read_from(sources):
+        def read_impl(sources):
+            if isinstance(sources, (str, Path)):
+                file_name = sources
+                if isinstance(file_name, str):
+                    file_name = Path(file_name)
+                if file_name.suffix in (".yaml", ".yml"):
+                    with file_name.open() as file:
+                        return yaml.safe_load(file)
+                elif file_name.suffix == ".json":
+                    with open(file_name) as file:
+                        return json.load(file)
+                else:
+                    msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}"
+                    raise ValueError(msg)
+            elif isinstance(sources, list):
+                result = {}
+                for p in map(read_impl, sources):
+                    Props.add_to(result, p)
+                return result
+            else:
+                msg = f"Can't run Props.read_from on sources-value of type {type(sources)}"
+                raise ValueError(msg)
+
+        return read_impl(sources)
+
+    @staticmethod
+    def add_to(props_a, props_b):
+        a = props_a
+        b = props_b
+
+        for key in b:
+            if key in a:
+                if isinstance(a[key], dict) and isinstance(b[key], dict):
+                    Props.add_to(a[key], b[key])
+                elif a[key] != b[key]:
+                    a[key] = copy.copy(b[key])
+            else:
+                a[key] = copy.copy(b[key])
+
+
+class PropsStream:
+    """Simple class to control loading of validity.yaml files"""
+
+    @staticmethod
+    def get(value):
+        if isinstance(value, str):
+            return PropsStream.read_from(value)
+
+        if isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
+            return value
+
+        msg = f"Can't get PropsStream from value of type {type(value)}"
+        raise ValueError(msg)
+
+    @staticmethod
+    def read_from(file_name):
+        with Path(file_name).open() as r:
+            file = yaml.safe_load(r)
+        file = sorted(file, key=lambda item: unix_time(item["valid_from"]))
+        yield from file
+
+
+class Catalog(namedtuple("Catalog", ["entries"])):
+    """Implementation of the `YAML metadata validity specification <https://legend-exp.github.io/legend-data-format-specs/dev/metadata/#Specifying-metadata-validity-in-time-(and-system)>`_."""
+
+    __slots__ = ()
+
+    class Entry(namedtuple("Entry", ["valid_from", "file"])):
+        __slots__ = ()
+
+    @staticmethod
+    def get(value):
+        if isinstance(value, Catalog):
+            return value
+
+        if isinstance(value, str):
+            return Catalog.read_from(value)
+
+        msg = f"Can't get Catalog from value of type {type(value)}"
+        raise ValueError(msg)
+
+    @staticmethod
+    def read_from(file_name):
+        """Read from a valdiity YAML file and build a Catalog object"""
+        entries = {}
+        for props in PropsStream.get(file_name):
+            timestamp = props["valid_from"]
+            system = "all" if props.get("category") is None else props["category"]
+            file_key = props["apply"]
+            if system not in entries:
+                entries[system] = []
+            mode = "append" if props.get("mode") is None else props["mode"]
+            mode = "reset" if len(entries[system]) == 0 else mode
+            if mode == "reset":
+                new = file_key
+            elif mode == "append":
+                new = entries[system][-1].file.copy() + file_key
+            elif mode == "remove":
+                new = entries[system][-1].file.copy()
+                for file in file_key:
+                    new.remove(file)
+            elif mode == "replace":
+                new = entries[system][-1].file.copy()
+                if len(file_key) != 2:
+                    msg = f"Invalid number of elements in replace mode: {len(file_key)}"
+                    raise ValueError(msg)
+                new.remove(file_key[0])
+                new += [file_key[1]]
+
+            else:
+                msg = f"Unknown mode for {timestamp}"
+                raise ValueError(msg)
+
+            if timestamp in [entry.valid_from for entry in entries[system]]:
+                msg = (
+                    f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry"
+                )
+                raise ValueError(msg)
+            entries[system].append(Catalog.Entry(unix_time(timestamp), new))
+
+        for system in entries:
+            entries[system] = sorted(entries[system], key=lambda entry: entry.valid_from)
+        return Catalog(entries)
+
+    def valid_for(self, timestamp, system="all", allow_none=False):
+        """Get the valid entries for a given timestamp and system"""
+        if system in self.entries:
+            valid_from = [entry.valid_from for entry in self.entries[system]]
+            pos = bisect.bisect_right(valid_from, unix_time(timestamp))
+            if pos > 0:
+                return self.entries[system][pos - 1].file
+
+            if system != "all":
+                return self.valid_for(timestamp, system="all", allow_none=allow_none)
+
+            if allow_none:
+                return None
+
+            msg = f"No valid entries found for timestamp: {timestamp}, system: {system}"
+            raise RuntimeError(msg)
+
+        if system != "all":
+            return self.valid_for(timestamp, system="all", allow_none=allow_none)
+
+        if allow_none:
+            return None
+
+        msg = f"No entries found for system: {system}"
+        raise RuntimeError(msg)
+
+    @staticmethod
+    def get_files(catalog_file, timestamp, category="all"):
+        """Helper function to get the files for a given timestamp and category"""
+        catalog = Catalog.read_from(catalog_file)
+        return Catalog.valid_for(catalog, timestamp, category)
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index 88720ae..2fc3525 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -6,20 +6,20 @@
 import json
 import re
 import warnings
-from typing import ClassVar
 
 import snakemake as smk
+import yaml
 
 from .FileKey import FileKey, ProcessingFileKey
 from .patterns import par_validity_pattern
 
 
 class pars_key_resolve:
-    name_dict: ClassVar[dict] = {"cal": ["par_dsp", "par_hit"], "lar": ["par_dsp", "par_hit"]}
 
     def __init__(self, valid_from, category, apply):
         self.valid_from = valid_from
         self.category = category
+        self.mode = "reset"
         self.apply = apply
 
     def __str__(self):
@@ -34,7 +34,7 @@ def from_filekey(cls, filekey, name_dict):
             filekey.timestamp,
             "all",
             filekey.get_path_from_filekey(
-                par_validity_pattern(), processing_step=name_dict, ext="json"
+                par_validity_pattern(), processing_step=name_dict, ext="yaml"
             ),
         )
 
@@ -44,6 +44,11 @@ def write_to_jsonl(file_names, path):
             for file_name in file_names:
                 of.write(f"{file_name.get_json()}\n")
 
+    @staticmethod
+    def write_to_yaml(file_names, path):
+        with open(path, "w") as of:
+            yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False)
+
     @staticmethod
     def match_keys(key1, key2):
         if (
diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py
index 03f242e..7a9dd87 100644
--- a/scripts/util/pars_loading.py
+++ b/scripts/util/pars_loading.py
@@ -5,14 +5,14 @@
 
 import os
 
-from .CalibCatalog import CalibCatalog
+from .catalog import Catalog
 from .FileKey import ProcessingFileKey
 
 # from .patterns import
 from .utils import get_pars_path, par_overwrite_path
 
 
-class pars_catalog(CalibCatalog):
+class pars_catalog(Catalog):
     @staticmethod
     def match_pars_files(filelist1, filelist2):
         for file2 in filelist2:
@@ -29,9 +29,9 @@ def match_pars_files(filelist1, filelist2):
 
     @staticmethod
     def get_par_file(setup, timestamp, tier):
-        par_file = os.path.join(get_pars_path(setup, tier), "validity.jsonl")
+        par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml")
         pars_files = pars_catalog.get_calib_files(par_file, timestamp)
-        par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
+        par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml")
         pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp)
         if len(pars_files_overwrite) > 0:
             pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index b60d73f..7f0b30c 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -5,29 +5,16 @@
 import os
 
 from .utils import (
-    par_dsp_path,
-    par_evt_path,
-    par_hit_path,
+    get_pars_path,
+    get_tier_path,
     par_overwrite_path,
-    par_pht_path,
-    par_psp_path,
-    par_raw_path,
-    par_tcm_path,
     pars_path,
     plts_path,
     sandbox_path,
     tier_daq_path,
-    tier_dsp_path,
-    tier_evt_path,
-    tier_hit_path,
     tier_path,
-    tier_pet_path,
-    tier_pht_path,
-    tier_psp_path,
     tier_raw_blind_path,
-    tier_raw_path,
     tier_skm_path,
-    tier_tcm_path,
     tmp_log_path,
     tmp_par_path,
     tmp_plts_path,
@@ -87,16 +74,6 @@ def get_pattern_tier_daq(setup):
     )
 
 
-def get_pattern_tier_raw(setup):
-    return os.path.join(
-        f"{tier_raw_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_raw.lh5",
-    )
-
-
 def get_pattern_tier_raw_blind(setup):
     return os.path.join(
         f"{tier_raw_blind_path(setup)}",
@@ -107,303 +84,55 @@ def get_pattern_tier_raw_blind(setup):
     )
 
 
-def get_pattern_tier_tcm(setup):
-    return os.path.join(
-        f"{tier_tcm_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_tcm.lh5",
-    )
-
-
-def get_pattern_tier_dsp(setup):
-    return os.path.join(
-        f"{tier_dsp_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_dsp.lh5",
-    )
-
-
-def get_pattern_tier_hit(setup):
-    return os.path.join(
-        f"{tier_hit_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_hit.lh5",
-    )
-
-
-def get_pattern_tier_evt(setup):
-    return os.path.join(
-        f"{tier_evt_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_evt.lh5",
-    )
-
-
-def get_pattern_tier_evt_concat(setup):
-    return os.path.join(
-        f"{tier_evt_path(setup)}",
-        "{datatype}",
-        "{experiment}-{period}-{run}-{datatype}-tier_evt.lh5",
-    )
-
-
-def get_pattern_tier_psp(setup):
-    return os.path.join(
-        f"{tier_psp_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_psp.lh5",
-    )
-
-
-def get_pattern_tier_pht(setup):
-    return os.path.join(
-        f"{tier_pht_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pht.lh5",
-    )
-
-
-def get_pattern_tier_pet(setup):
-    return os.path.join(
-        f"{tier_pet_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_pet.lh5",
-    )
-
-
-def get_pattern_tier_pet_concat(setup):
-    return os.path.join(
-        f"{tier_pet_path(setup)}",
-        "{datatype}",
-        "{experiment}-{period}-{run}-{datatype}-tier_pet.lh5",
-    )
-
-
-def get_pattern_tier_skm(setup):
-    return os.path.join(
-        f"{tier_skm_path(setup)}",
-        "phy",
-        "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
-    )
-
-
 def get_pattern_tier(setup, tier, check_in_cycle=True):
-    if tier == "daq":
-        file_pattern = get_pattern_tier_daq(setup)
-    elif tier == "raw":
-        file_pattern = get_pattern_tier_raw(setup)
-    elif tier == "tcm":
-        file_pattern = get_pattern_tier_tcm(setup)
-    elif tier == "dsp":
-        file_pattern = get_pattern_tier_dsp(setup)
-    elif tier == "hit":
-        file_pattern = get_pattern_tier_hit(setup)
-    elif tier == "evt":
-        file_pattern = get_pattern_tier_evt(setup)
-    elif tier == "evt_concat":
-        file_pattern = get_pattern_tier_evt_concat(setup)
-    elif tier == "psp":
-        file_pattern = get_pattern_tier_psp(setup)
-    elif tier == "pht":
-        file_pattern = get_pattern_tier_pht(setup)
-    elif tier == "pet":
-        file_pattern = get_pattern_tier_pet(setup)
-    elif tier == "pet_concat":
-        file_pattern = get_pattern_tier_pet_concat(setup)
-    elif tier == "skm":
-        file_pattern = get_pattern_tier_skm(setup)
-    else:
-        msg = "invalid tier"
-        raise Exception(msg)
-    if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
-        return "/tmp/" + os.path.basename(file_pattern)
-    else:
-        return file_pattern
-
-
-def get_pattern_par_raw(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_raw_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_raw_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_raw_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_raw" + f".{extension}",
-        )
-
-
-def get_pattern_par_tcm(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_tcm_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_tcm_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_tcm" + f".{extension}",
-        )
-
-
-def get_pattern_par_dsp(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_dsp_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_dsp_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_dsp" + f".{extension}",
-        )
-
-
-def get_pattern_par_hit(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_hit_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_hit_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_hit_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_hit" + f".{extension}",
-        )
-
-
-def get_pattern_par_evt(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_evt_" + f"{name}.{extension}",
-        )
-    else:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_evt" + f".{extension}",
-        )
-
-
-def get_pattern_par_psp(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_psp_path(setup)}",
-            "cal",
+    if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
+        file_pattern = os.path.join(
+            get_tier_path(setup, tier),
+            "{datatype}",
             "{period}",
             "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_psp_" + f"{name}.{extension}",
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5",
         )
-    else:
-        return os.path.join(
-            f"{par_psp_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_psp" + f".{extension}",
+    elif tier in ["evt_concat", "pet_concat"]:
+        file_pattern = os.path.join(
+            get_tier_path(setup, tier[:3]),
+            "{datatype}",
+            "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5",
         )
 
-
-def get_pattern_par_pht(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_pht_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pht_" + f"{name}.{extension}",
+    elif tier == "skm":
+        file_pattern = os.path.join(
+            f"{tier_skm_path(setup)}",
+            "phy",
+            "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
         )
     else:
-        return os.path.join(
-            f"{par_pht_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pht" + f".{extension}",
-        )
-
-
-def get_pattern_par_pet(setup, name=None, extension="json"):
-    if name is not None:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pet_" + f"{name}.{extension}",
-        )
+        msg = "invalid tier"
+        raise Exception(msg)
+    if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
+        return "/tmp/" + os.path.basename(file_pattern)
     else:
-        return os.path.join(
-            f"{par_evt_path(setup)}",
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-par_pet" + f".{extension}",
-        )
+        return file_pattern
 
 
-def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=True):
-    if tier == "raw":
-        file_pattern = get_pattern_par_raw(setup, name, extension)
-    elif tier == "tcm":
-        file_pattern = get_pattern_par_tcm(setup, name, extension)
-    elif tier == "dsp":
-        file_pattern = get_pattern_par_dsp(setup, name, extension)
-    elif tier == "hit":
-        file_pattern = get_pattern_par_hit(setup, name, extension)
-    elif tier == "evt":
-        file_pattern = get_pattern_par_evt(setup, name, extension)
-    elif tier == "psp":
-        file_pattern = get_pattern_par_psp(setup, name, extension)
-    elif tier == "pht":
-        file_pattern = get_pattern_par_pht(setup, name, extension)
-    elif tier == "pet":
-        file_pattern = get_pattern_par_pet(setup, name, extension)
+def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True):
+    if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
+        if name is not None:
+            return os.path.join(
+                get_pars_path(setup, tier),
+                "cal",
+                "{period}",
+                "{run}",
+                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}",
+            )
+        else:
+            file_pattern = os.path.join(
+                get_pars_path(setup, tier),
+                "cal",
+                "{period}",
+                "{run}",
+                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}",
+            )
     else:
         msg = "invalid tier"
         raise Exception(msg)
@@ -419,7 +148,7 @@ def get_pattern_pars(setup, tier, name=None, extension="json", check_in_cycle=Tr
         return file_pattern
 
 
-def get_pattern_pars_svm(setup, tier, name=None, ext="json"):
+def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
     if name is not None:
         return os.path.join(
             f"{par_overwrite_path(setup)}",
@@ -440,7 +169,7 @@ def get_pattern_pars_svm(setup, tier, name=None, ext="json"):
         )
 
 
-def get_pattern_pars_overwrite(setup, tier, name=None):
+def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
     if name is not None:
         return os.path.join(
             f"{par_overwrite_path(setup)}",
@@ -449,10 +178,7 @@ def get_pattern_pars_overwrite(setup, tier, name=None):
             "{period}",
             "{run}",
             "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            + tier
-            + "_"
-            + name
-            + "-overwrite.json",
+            + f"{tier}_{name}-overwrite.{extension}",
         )
     else:
         return os.path.join(
@@ -461,32 +187,34 @@ def get_pattern_pars_overwrite(setup, tier, name=None):
             "{datatype}",
             "{period}",
             "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_" + tier + "-overwrite.json",
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+            + tier
+            + f"-overwrite.{extension}",
         )
 
 
-def get_pattern_pars_tmp(setup, tier, name=None, datatype=None):
+def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"):
     if datatype is None:
         datatype = "{datatype}"
     if name is None:
         return os.path.join(
             f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + tier + ".json",
+            "{experiment}-{period}-{run}-"
+            + datatype
+            + "-{timestamp}-par_"
+            + f"{tier}.{extension}",
         )
     else:
         return os.path.join(
             f"{tmp_par_path(setup)}",
             "{experiment}-{period}-{run}-"
             + datatype
-            + "-{timestamp}-par_"
-            + tier
-            + "_"
-            + name
-            + ".json",
+            + "-{timestamp}"
+            + f"par_{tier}_{name}.{extension}",
         )
 
 
-def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="json"):
+def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
         return os.path.join(
             f"{tmp_par_path(setup)}",
@@ -509,11 +237,7 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None):
     else:
         return os.path.join(
             f"{tmp_plts_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
-            + tier
-            + "_"
-            + name
-            + ".pkl",
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl",
         )
 
 
@@ -538,19 +262,6 @@ def get_pattern_plts(setup, tier, name=None):
         )
 
 
-def get_energy_grids_pattern_combine(setup):
-    return os.path.join(
-        f"{tmp_par_path(setup)}",
-        "dsp",
-        "cal",
-        "{{period}}",
-        "{{run}}",
-        "par_dsp_energy_grid",
-        "{{channel}}",
-        "{{experiment}}-{{period}}-{{run}}-cal-{{timestamp}}-{{channel}}-{peak}-par_dsp_energy_grid.pkl",
-    )
-
-
 def get_pattern_log(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",
@@ -559,17 +270,17 @@ def get_pattern_log(setup, processing_step):
     )
 
 
-def get_pattern_log_concat(setup, processing_step):
+def get_pattern_log_channel(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",
         processing_step,
-        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
+        "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log",
     )
 
 
-def get_pattern_log_channel(setup, processing_step):
+def get_pattern_log_concat(setup, processing_step):
     return os.path.join(
         f"{tmp_log_path(setup)}",
         processing_step,
-        "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log",
+        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
     )
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 894d69e..2cb53ef 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -40,135 +40,51 @@ def tier_path(setup):
     return setup["paths"]["tier"]
 
 
-def tier_tcm_path(setup):
-    return setup["paths"]["tier_tcm"]
-
-
-def tier_raw_path(setup):
-    return setup["paths"]["tier_raw"]
-
-
-def tier_dsp_path(setup):
-    return setup["paths"]["tier_dsp"]
-
-
-def tier_hit_path(setup):
-    return setup["paths"]["tier_hit"]
-
-
-def tier_evt_path(setup):
-    return setup["paths"]["tier_evt"]
-
-
-def tier_psp_path(setup):
-    return setup["paths"]["tier_psp"]
-
-
-def tier_pht_path(setup):
-    return setup["paths"]["tier_pht"]
-
-
-def tier_pet_path(setup):
-    return setup["paths"]["tier_pet"]
-
-
-def tier_skm_path(setup):
-    return setup["paths"]["tier_skm"]
-
-
 def get_tier_path(setup, tier):
     if tier == "raw":
-        return tier_raw_path(setup)
+        return setup["paths"]["tier_raw"]
     elif tier == "tcm":
-        return tier_tcm_path(setup)
+        return setup["paths"]["tier_tcm"]
     elif tier == "dsp":
-        return tier_dsp_path(setup)
+        return setup["paths"]["tier_dsp"]
     elif tier == "hit":
-        return tier_hit_path(setup)
+        return setup["paths"]["tier_hit"]
     elif tier == "evt":
-        return tier_evt_path(setup)
+        return setup["paths"]["tier_evt"]
     elif tier == "psp":
-        return tier_psp_path(setup)
+        return setup["paths"]["tier_psp"]
     elif tier == "pht":
-        return tier_pht_path(setup)
+        return setup["paths"]["tier_pht"]
     elif tier == "pet":
-        return tier_pet_path(setup)
+        return setup["paths"]["tier_pet"]
     elif tier == "skm":
-        return tier_skm_path(setup)
+        return setup["paths"]["tier_skm"]
     else:
         msg = f"no tier matching:{tier}"
         raise ValueError(msg)
 
 
-def config_path(setup):
-    return setup["paths"]["config"]
-
-
-def chan_map_path(setup):
-    return setup["paths"]["chan_map"]
-
-
-def metadata_path(setup):
-    return setup["paths"]["metadata"]
-
-
-def detector_db_path(setup):
-    return setup["paths"]["detector_db"]
-
-
-def par_raw_path(setup):
-    return setup["paths"]["par_raw"]
-
-
-def par_tcm_path(setup):
-    return setup["paths"]["par_tcm"]
-
-
-def par_dsp_path(setup):
-    return setup["paths"]["par_dsp"]
-
-
-def par_hit_path(setup):
-    return setup["paths"]["par_hit"]
-
-
-def par_evt_path(setup):
-    return setup["paths"]["par_evt"]
-
-
-def par_psp_path(setup):
-    return setup["paths"]["par_psp"]
-
-
-def par_pht_path(setup):
-    return setup["paths"]["par_pht"]
-
-
-def par_pet_path(setup):
-    return setup["paths"]["par_pet"]
-
-
 def pars_path(setup):
     return setup["paths"]["par"]
 
 
 def get_pars_path(setup, tier):
     if tier == "raw":
-        return par_raw_path(setup)
+        return setup["paths"]["par_raw"]
     elif tier == "tcm":
-        return par_tcm_path(setup)
+        return setup["paths"]["par_tcm"]
     elif tier == "dsp":
-        return par_dsp_path(setup)
+        return setup["paths"]["par_dsp"]
     elif tier == "hit":
-        return par_hit_path(setup)
+        return setup["paths"]["par_hit"]
     elif tier == "evt":
-        return par_evt_path(setup)
+        return setup["paths"]["par_evt"]
     elif tier == "psp":
-        return par_psp_path(setup)
+        return setup["paths"]["par_psp"]
     elif tier == "pht":
-        return par_pht_path(setup)
+        return setup["paths"]["par_pht"]
     elif tier == "pet":
-        return par_pet_path(setup)
+        return setup["paths"]["par_pet"]
     else:
         msg = f"no tier matching:{tier}"
         raise ValueError(msg)
@@ -190,6 +106,22 @@ def par_overwrite_path(setup):
     return setup["paths"]["par_overwrite"]
 
 
+def config_path(setup):
+    return setup["paths"]["config"]
+
+
+def chan_map_path(setup):
+    return setup["paths"]["chan_map"]
+
+
+def metadata_path(setup):
+    return setup["paths"]["metadata"]
+
+
+def detector_db_path(setup):
+    return setup["paths"]["detector_db"]
+
+
 def log_path(setup):
     return setup["paths"]["log"]
 

From 41c326bca6b596a78c9da886ad76a123c3d1e507 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 15:22:10 +0100
Subject: [PATCH 007/101] update rules for pattern changes

---
 Snakefile                      |  2 +-
 rules/blinding_calibration.smk |  2 +-
 rules/blinding_check.smk       |  2 +-
 rules/common.smk               |  4 ++--
 rules/dsp.smk                  |  5 +----
 rules/evt.smk                  | 10 +++++-----
 rules/hit.smk                  |  5 ++---
 rules/pht.smk                  |  1 -
 rules/pht_fast.smk             |  1 -
 rules/psp.smk                  |  2 +-
 rules/raw.smk                  |  4 +++-
 rules/tcm.smk                  |  3 +--
 12 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/Snakefile b/Snakefile
index 017f0b1..b2daaa2 100644
--- a/Snakefile
+++ b/Snakefile
@@ -44,7 +44,7 @@ configs = config_path(setup)
 chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
 swenv = runcmd(setup)
-part = ds.dataset_file(setup, os.path.join(configs, "partitions.json"))
+part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json"))
 basedir = workflow.basedir
 
 
diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk
index ef0a11e..bcf0d64 100644
--- a/rules/blinding_calibration.smk
+++ b/rules/blinding_calibration.smk
@@ -5,7 +5,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps:
 """
 
 from scripts.util.patterns import (
-    get_pattern_par_raw,
+    get_pattern_pars,
     get_pattern_plts,
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk
index 653eb3f..ac7240c 100644
--- a/rules/blinding_check.smk
+++ b/rules/blinding_check.smk
@@ -8,7 +8,7 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_raw,
+    get_pattern_pars,
     get_pattern_plts,
     get_pattern_pars,
 )
diff --git a/rules/common.smk b/rules/common.smk
index c74f514..b985044 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -8,7 +8,7 @@ from scripts.util.patterns import (
     par_raw_path,
     get_pattern_unsorted_data,
     get_pattern_tier_daq,
-    get_pattern_tier_raw,
+    get_pattern_tier,
     get_pattern_plts_tmp_channel,
 )
 from scripts.util import ProcessingFileKey
@@ -114,4 +114,4 @@ def get_tier_pattern(tier):
     elif tier == "raw":
         return get_pattern_tier_daq(setup)
     else:
-        return get_pattern_tier_raw(setup)
+        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 661a990..f8ea4a3 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -13,10 +13,7 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_dsp,
     get_pattern_plts,
-    get_pattern_tier_raw,
-    get_pattern_tier_tcm,
     get_pattern_tier,
     get_pattern_pars_tmp,
     get_pattern_log,
@@ -386,7 +383,7 @@ rule build_pars_dsp:
 
 rule build_dsp:
     input:
-        raw_file=get_pattern_tier_raw(setup),
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
         pars_file=ancient(
             lambda wildcards: pars_catalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
diff --git a/rules/evt.smk b/rules/evt.smk
index d51ad39..c760b54 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -21,16 +21,16 @@ for tier in ("evt", "pet"):
     rule:
         input:
             dsp_file=(
-                get_pattern_tier_dsp(setup)
+                get_pattern_tier(setup, "dsp", check_in_cycle=False)
                 if tier == "evt"
-                else get_pattern_tier_psp(setup)
+                else get_pattern_tier(setup, "psp", check_in_cycle=False)
             ),
             hit_file=(
-                get_pattern_tier_hit(setup)
+                get_pattern_tier(setup, "hit", check_in_cycle=False)
                 if tier == "evt"
-                else get_pattern_tier_pht(setup)
+                else get_pattern_tier(setup, "pht", check_in_cycle=False)
             ),
-            tcm_file=get_pattern_tier_tcm(setup),
+            tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
             xtalk_matrix=lambda wildcards: get_svm_file(
                 tier=tier, wildcards=wildcards, name="xtc"
             ),
diff --git a/rules/hit.smk b/rules/hit.smk
index fac37a1..f1bb0ba 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -11,9 +11,8 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_hit,
+    get_pattern_pars,
     get_pattern_plts,
-    get_pattern_tier_dsp,
     get_pattern_tier,
     get_pattern_pars_tmp,
     get_pattern_log,
@@ -297,7 +296,7 @@ rule build_pars_hit:
 
 rule build_hit:
     input:
-        dsp_file=get_pattern_tier_dsp(setup),
+        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
         pars_file=lambda wildcards: pars_catalog.get_par_file(
             setup, wildcards.timestamp, "hit"
         ),
diff --git a/rules/pht.smk b/rules/pht.smk
index 86646fa..76542a3 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -13,7 +13,6 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_pht,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index 925d42c..5672011 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -5,7 +5,6 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_pht,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
diff --git a/rules/psp.smk b/rules/psp.smk
index 9a3e4af..a959cf4 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -337,7 +337,7 @@ rule build_pars_psp:
 
 rule build_psp:
     input:
-        raw_file=get_pattern_tier_raw(setup),
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
         pars_file=ancient(
             lambda wildcards: pars_catalog.get_par_file(
                 setup, wildcards.timestamp, "psp"
diff --git a/rules/raw.smk b/rules/raw.smk
index 20d1105..a81520a 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -43,7 +43,9 @@ rule build_raw_blind:
     and runs only if the blinding check file is on disk. Output is just the blinded raw file.
     """
     input:
-        tier_file=get_pattern_tier_raw(setup).replace("{datatype}", "phy"),
+        tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace(
+            "{datatype}", "phy"
+        ),
         blind_file=get_blinding_curve_file,
     params:
         timestamp="{timestamp}",
diff --git a/rules/tcm.smk b/rules/tcm.smk
index 657cda3..c1164bb 100644
--- a/rules/tcm.smk
+++ b/rules/tcm.smk
@@ -3,7 +3,6 @@ Snakemake file containing the rules for generating the tcm
 """
 
 from scripts.util.patterns import (
-    get_pattern_tier_raw,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars_tmp_channel,
@@ -14,7 +13,7 @@ from scripts.util.patterns import (
 # This rule builds the tcm files each raw file
 rule build_tier_tcm:
     input:
-        get_pattern_tier_raw(setup),
+        get_pattern_tier(setup, "raw", check_in_cycle=False),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",

From 1698eb1561a8a49d9fd154688f3e01cda8c2cdee Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 15:30:19 +0100
Subject: [PATCH 008/101] add debug mode functionality

---
 scripts/pars_hit_aoe.py     | 4 ++++
 scripts/pars_hit_ecal.py    | 4 ++++
 scripts/pars_hit_lq.py      | 4 ++++
 scripts/pars_pht_aoecal.py  | 4 ++++
 scripts/pars_pht_fast.py    | 2 ++
 scripts/pars_pht_lqcal.py   | 4 ++++
 scripts/pars_pht_partcal.py | 8 +++++++-
 7 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index ed33f23..be40ed5 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -66,6 +66,7 @@ def aoe_calibration(
     dt_param: str = "dt_eff",
     comptBands_width: int = 20,
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     data["AoE_Uncorr"] = data[current_param] / data[energy_param]
     aoe = CalAoE(
@@ -82,6 +83,7 @@ def aoe_calibration(
         mean_func=mean_func,
         sigma_func=sigma_func,
         compt_bands_width=comptBands_width,
+        debug_mode=debug_mode | args.debug,
     )
 
     aoe.update_cal_dicts(
@@ -116,6 +118,8 @@ def aoe_calibration(
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--aoe_results", help="aoe_results", type=str)
+
+argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index d19b427..f7b8be3 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -439,6 +439,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
     argparser.add_argument("--save_path", help="save_path", type=str)
     argparser.add_argument("--results_path", help="results_path", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -565,6 +567,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             glines,
             guess,
             kwarg_dict.get("deg", 0),
+            debug_mode=kwarg_dict.get("debug_mode", False) | args.debug,
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             e_uncal, etol_kev=5 if det_status == "on" else 20
@@ -575,6 +578,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                 glines,
                 guess,
                 kwarg_dict.get("deg", 0),
+                debug_mode=kwarg_dict.get("debug_mode", False),
             )
             full_object_dict[cal_energy_param].hpge_get_energy_peaks(
                 e_uncal, etol_kev=5 if det_status == "on" else 30, n_sigma=2
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 5a0ad96..da83623 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -54,6 +54,7 @@ def lq_calibration(
     cdf: callable = gaussian,
     selection_string: str = "",
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     """Loads in data from the provided files and runs the LQ calibration on said files
 
@@ -99,6 +100,7 @@ def lq_calibration(
         eres_func,
         cdf,
         selection_string,
+        debug_mode=debug_mode | args.debug,
     )
 
     data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
@@ -136,6 +138,8 @@ def lq_calibration(
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--lq_results", help="lq_results", type=str)
+
+argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index bf91d38..8fb2b36 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -92,6 +92,7 @@ def aoe_calibration(
     dt_param: str = "dt_eff",
     comptBands_width: int = 20,
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     data["AoE_Uncorr"] = data[current_param] / data[energy_param]
     aoe = CalAoE(
@@ -108,6 +109,7 @@ def aoe_calibration(
         mean_func=mean_func,
         sigma_func=sigma_func,
         compt_bands_width=comptBands_width,
+        debug_mode=debug_mode | args.debug,
     )
     aoe.update_cal_dicts(
         {
@@ -263,6 +265,8 @@ def eres_func(x):
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 8210df7..6ab1a4b 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -66,6 +66,8 @@ def run_splitter(files):
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 3d5915e..890554f 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -75,6 +75,7 @@ def lq_calibration(
     cdf: callable = gaussian,
     selection_string: str = "",
     plot_options: dict | None = None,
+    debug_mode: bool = False,
 ):
     """Loads in data from the provided files and runs the LQ calibration on said files
 
@@ -119,6 +120,7 @@ def lq_calibration(
         eres_func,
         cdf,
         selection_string,
+        debug_mode=debug_mode | args.debug,
     )
 
     data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
@@ -259,6 +261,8 @@ def eres_func(x):
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 0d74ac8..b6f12d7 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -218,7 +218,11 @@ def calibrate_partition(
     for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
         energy = data.query(selection_string)[energy_param].to_numpy()
         full_object_dict[cal_energy_param] = HPGeCalibration(
-            energy_param, glines, 1, kwarg_dict.get("deg", 0)  # , fixed={1: 1}
+            energy_param,
+            glines,
+            1,
+            kwarg_dict.get("deg", 0),
+            debug_mode=kwarg_dict.get("debug_mode", False) | args.debug,  # , fixed={1: 1}
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             energy,
@@ -426,6 +430,8 @@ def calibrate_partition(
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")

From b8404444ee8fab5fbac4f871f6c8f535906c82d3 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 27 Nov 2024 18:02:08 +0100
Subject: [PATCH 009/101] os to pathlib.Path

---
 .ruff.toml                          |   2 +-
 scripts/blinding_calibration.py     |   3 +-
 scripts/build_dsp.py                |  13 +-
 scripts/build_evt.py                |   5 +-
 scripts/build_hit.py                |  11 +-
 scripts/build_raw.py                |  10 +-
 scripts/build_raw_blind.py          |   9 +-
 scripts/build_skm.py                |   5 +-
 scripts/build_tcm.py                |   7 +-
 scripts/check_blinding.py           |   9 +-
 scripts/complete_run.py             |  49 +++---
 scripts/create_chankeylist.py       |   7 +-
 scripts/merge_channels.py           |  35 ++--
 scripts/par_psp.py                  |  18 +-
 scripts/pars_dsp_build_svm.py       |   3 +-
 scripts/pars_dsp_dplms.py           |  17 +-
 scripts/pars_dsp_eopt.py            |  17 +-
 scripts/pars_dsp_event_selection.py |  15 +-
 scripts/pars_dsp_nopt.py            |  13 +-
 scripts/pars_dsp_svm.py             |   9 +-
 scripts/pars_dsp_tau.py             |  13 +-
 scripts/pars_hit_aoe.py             |  21 ++-
 scripts/pars_hit_ecal.py            |  21 +--
 scripts/pars_hit_lq.py              |  21 ++-
 scripts/pars_hit_qc.py              |  11 +-
 scripts/pars_pht_aoecal.py          |  37 ++--
 scripts/pars_pht_fast.py            |  39 ++--
 scripts/pars_pht_lqcal.py           |  41 +++--
 scripts/pars_pht_partcal.py         |  35 ++--
 scripts/pars_pht_qc.py              |  19 +-
 scripts/pars_pht_qc_phy.py          |  13 +-
 scripts/pars_tcm_pulser.py          |   7 +-
 scripts/util/FileKey.py             |   6 +-
 scripts/util/cal_grouping.py        |  25 ++-
 scripts/util/catalog.py             |   2 +-
 scripts/util/create_pars_keylist.py |   8 +-
 scripts/util/pars_loading.py        |  11 +-
 scripts/util/patterns.py            | 264 +++++++++++++++-------------
 scripts/util/utils.py               |   6 +-
 tests/test_util.py                  |  19 +-
 40 files changed, 431 insertions(+), 445 deletions(-)

diff --git a/.ruff.toml b/.ruff.toml
index 29f8014..8b4d420 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -12,7 +12,7 @@ lint.select = [
   "PIE",         # flake8-pie
   "PL",          # pylint
   "PT",          # flake8-pytest-style
-  # "PTH",         # flake8-use-pathlib
+  "PTH",         # flake8-use-pathlib
   "RET",         # flake8-return
   "RUF",         # Ruff-specific
   "SIM",         # flake8-simplify
diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py
index 6a1b0a7..62207e9 100644
--- a/scripts/blinding_calibration.py
+++ b/scripts/blinding_calibration.py
@@ -7,6 +7,7 @@
 import argparse
 import logging
 import pickle as pkl
+from pathlib import Path
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -93,7 +94,7 @@
 ax2.set_xlabel("energy (keV)")
 ax2.set_ylabel("counts")
 plt.suptitle(args.channel)
-with open(args.plot_file, "wb") as w:
+with Path(args.plot_file).open("wb") as w:
     pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
 plt.close()
 
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index cbd0794..02bf6a1 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -1,6 +1,5 @@
 import argparse
 import logging
-import os
 import pathlib
 import re
 import time
@@ -37,7 +36,7 @@ def replace_list_with_array(dic):
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
@@ -51,13 +50,13 @@ def replace_list_with_array(dic):
 
 channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
 db_files = [
-    par_file for par_file in args.pars_file if os.path.splitext(par_file)[1] in (".json", ".yaml")
+    par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
 database_dic = replace_list_with_array(database_dic)
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0, 99999):05d}"
@@ -78,9 +77,9 @@ def replace_list_with_array(dic):
 
 log.info(f"build_dsp finished in {time.time()-start}")
 
-os.rename(temp_output, args.output)
+pathlib.Path(temp_output).rename(args.output)
 
-key = os.path.basename(args.output).replace("-tier_dsp.lh5", "")
+key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "")
 
 raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
 
@@ -109,5 +108,5 @@ def replace_list_with_array(dic):
     },
     "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
 }
-pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
+pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.db_file, full_dict)
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 3d993d8..6927c24 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -1,7 +1,6 @@
 import argparse
 import json
 import logging
-import os
 import time
 from pathlib import Path
 
@@ -51,7 +50,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 args = argparser.parse_args()
 
 if args.log is not None:
-    Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 else:
     logging.basicConfig(level=logging.DEBUG)
@@ -118,7 +117,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 log.debug(json.dumps(evt_config["channels"], indent=2))
 
 t_start = time.time()
-Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 table = build_evt(
     {
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index c550337..8e2da80 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -1,8 +1,7 @@
 import argparse
 import logging
-import os
-import pathlib
 import time
+from pathlib import Path
 
 from legendmeta import TextDB
 from legendmeta.catalog import Props
@@ -24,7 +23,7 @@
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
@@ -59,7 +58,7 @@
         hit_dict[f"{channel}/dsp"] = chan_pars
 
 t_start = time.time()
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output)
 t_elap = time.time() - t_start
 log.info(f"Done!  Time elapsed: {t_elap:.2f} sec.")
@@ -80,12 +79,12 @@
         }
     hit_channels.append(channel)
 
-key = os.path.basename(args.output).replace(f"-tier_{args.tier}.lh5", "")
+key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "")
 
 full_dict = {
     "valid_fields": {args.tier: hit_outputs},
     "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}},
 }
 
-pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
+Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.db_file, full_dict)
diff --git a/scripts/build_raw.py b/scripts/build_raw.py
index c02b67b..03a4fca 100644
--- a/scripts/build_raw.py
+++ b/scripts/build_raw.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import numpy as np
 from daq2lh5 import build_raw
@@ -18,10 +17,10 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-os.makedirs(os.path.dirname(args.log), exist_ok=True)
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][
@@ -83,4 +82,5 @@
 
 build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
 
-os.rename(temp_output, args.output)
+# rename the temp file
+Path(temp_output).rename(args.output)
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 0400f22..33a6c31 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -12,8 +12,7 @@
 
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import numexpr as ne
 import numpy as np
@@ -35,11 +34,11 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-os.makedirs(os.path.dirname(args.log), exist_ok=True)
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 logging.getLogger("lgdo").setLevel(logging.INFO)
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
 channel_dict = configs.on(args.timestamp, system=args.datatype)
@@ -167,4 +166,4 @@
     )
 
 # rename the temp file
-os.rename(temp_output, args.output)
+Path(temp_output).rename(args.output)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index a327caa..10bf876 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import awkward as ak
 from legendmeta import TextDB
@@ -32,7 +31,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
 args = argparser.parse_args()
 
 if args.log is not None:
-    pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    Path(args.log).parent.makedir(parents=True, exist_ok=True)
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index c39faea..2ceb3ab 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -21,7 +20,7 @@
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"]
@@ -50,4 +49,4 @@
         **settings,
     )
 
-os.rename(temp_output, args.output)
+Path(temp_output).rename(args.output)
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 4d8a6fa..7d6da04 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -8,9 +8,8 @@
 
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
+from pathlib import Path
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -40,7 +39,7 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-os.makedirs(os.path.dirname(args.log), exist_ok=True)
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
@@ -85,7 +84,7 @@
 ax2.set_xlabel("energy (keV)")
 ax2.set_ylabel("counts")
 plt.suptitle(args.channel)
-with open(args.plot_file, "wb") as w:
+with Path(args.plot_file).open("wb") as w:
     pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
 plt.close()
 
@@ -93,7 +92,7 @@
 # valid and if so create file else raise error.  if detector is in ac mode it
 # will always pass this check
 if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False:
-    pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.output, {})
 else:
     msg = "peaks not found in daqenergy"
diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index f61ba37..fe800e8 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -1,7 +1,6 @@
 # ruff: noqa: F821, T201
 
 import datetime
-import glob
 import json
 import os
 import time
@@ -20,14 +19,14 @@ def as_ro(path):
 
 def check_log_files(log_path, output_file, gen_output, warning_file=None):
     now = datetime.datetime.now(datetime.UTC).strftime("%d/%m/%y %H:%M")
-    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
     if warning_file is not None:
-        os.makedirs(os.path.dirname(warning_file), exist_ok=True)
-        with open(warning_file, "w") as w, open(output_file, "w") as f:
+        Path(warning_file).parent.mkdir(parents=True, exist_ok=True)
+        with Path(warning_file).open("w") as w, Path(output_file).open("w") as f:
             n_errors = 0
             n_warnings = 0
             for file in Path(log_path).rglob("*.log"):
-                with open(file) as r:
+                with Path(file).open() as r:
                     text = r.read()
                     if "ERROR" in text or "WARNING" in text:
                         for line in text.splitlines():
@@ -40,24 +39,24 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None):
                                     w.write(
                                         f"{gen_output} successfully generated at {now} with warnings \n"
                                     )
-                                f.write(f"{os.path.basename(file)} : {line}\n")
+                                f.write(f"{Path(file).name} : {line}\n")
                                 n_errors += 1
                             elif "WARNING" in line:
-                                w.write(f"{os.path.basename(file)} : {line}\n")
+                                w.write(f"{Path(file).name} : {line}\n")
                                 n_warnings += 1
                     else:
                         pass
-                os.remove(file)
+                Path(file).unlink()
                 text = None
             if n_errors == 0:
                 f.write(f"{gen_output} successfully generated at {now} with no errors \n")
             if n_warnings == 0:
                 w.write(f"{gen_output} successfully generated at {now} with no warnings \n")
     else:
-        with open(output_file, "w") as f:
+        with Path(output_file).open("w") as f:
             n_errors = 0
             for file in Path(log_path).rglob("*.log"):
-                with open(file) as r:
+                with Path(file).open() as r:
                     text = r.read()
                     if "ERROR" in text:
                         for line in text.splitlines():
@@ -66,18 +65,18 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None):
                                     f.write(
                                         f"{gen_output} successfully generated at {now} with errors \n"
                                     )
-                                f.write(f"{os.path.basename(file)} : {line}\n")
+                                f.write(f"{Path(file).name} : {line}\n")
                                 n_errors += 1
                     else:
                         pass
-                os.remove(file)
+                Path(file).unlink()
                 text = None
             if n_errors == 0:
                 f.write(f"{gen_output} successfully generated at {now} with no errors \n")
     walk = list(os.walk(log_path))
     for path, _, _ in walk[::-1]:
         if len(os.listdir(path)) == 0:
-            os.rmdir(path)
+            Path(path).rmdir()
 
 
 def add_spaces(n):
@@ -124,7 +123,7 @@ def get_run(Filekey):
 
     key_dict = {}
     for file in files:
-        key = FileKey.get_filekey_from_filename(os.path.basename(file))
+        key = FileKey.get_filekey_from_filename(Path(file).name)
         if get_run(key) in key_dict:
             key_dict[get_run(key)].append(file)
         else:
@@ -133,24 +132,24 @@ def get_run(Filekey):
 
 
 def build_valid_keys(input_files, output_dir):
-    infiles = glob.glob(as_ro(input_files))
+    infiles = Path(as_ro(input_files)).glob()
     key_dict = get_keys(infiles)
 
     for key in list(key_dict):
         dtype = key.split("-")[-1]
-        out_file = os.path.join(output_dir, f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json')
-        Path(os.path.dirname(out_file)).mkdir(parents=True, exist_ok=True)
-        if os.path.isfile(out_file):
+        out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json'
+        out_file.parent.mkdir(parents=True, exist_ok=True)
+        if Path(out_file).is_file():
             out_dict = Props.read_from([out_file] + key_dict[key])
         else:
             out_dict = Props.read_from(key_dict[key])
         out_string = readable_json(out_dict)
-        with open(out_file, "w") as w:
+        with Path(out_file).open("w") as w:
             w.write(out_string)
 
     for input_file in infiles:
-        if os.path.isfile(input_file):
-            os.remove(input_file)
+        if Path(input_file).is_file():
+            Path(input_file).unlink()
 
 
 def find_gen_runs(gen_tier_path):
@@ -268,16 +267,16 @@ def fformat(tier):
 if snakemake.wildcards.tier != "daq":
     print(f"INFO: ...building FileDBs with {snakemake.threads} threads")
 
-    os.makedirs(snakemake.params.filedb_path, exist_ok=True)
+    Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True)
 
-    with open(os.path.join(snakemake.params.filedb_path, "file_db_config.json"), "w") as f:
+    with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f:
         json.dump(file_db_config, f, indent=2)
 
     build_file_dbs(ut.tier_path(snakemake.params.setup), snakemake.params.filedb_path)
-    os.remove(os.path.join(snakemake.params.filedb_path, "file_db_config.json"))
+    (Path(snakemake.params.filedb_path) / "file_db_config.json").unlink()
 
     build_valid_keys(
-        os.path.join(ut.tmp_par_path(snakemake.params.setup), "*_db.json"),
+        Path(ut.tmp_par_path(snakemake.params.setup)) / "*_db.json",
         snakemake.params.valid_keys_path,
     )
 
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index 435f55c..6ed4510 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -1,6 +1,5 @@
 import argparse
-import os
-import pathlib
+from pathlib import Path
 
 from legendmeta import LegendMetadata, TextDB
 
@@ -25,7 +24,7 @@
     if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
 ]
 
-pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
-with open(args.output_file, "w") as f:
+Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
+with Path(args.output_file).open("w") as f:
     for chan in channels:
         f.write(f"{chan}\n")
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index a86d47d..e8994be 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -1,8 +1,7 @@
 import argparse
-import os
-import pathlib
 import pickle as pkl
 import shelve
+from pathlib import Path
 
 import numpy as np
 from legendmeta.catalog import Props
@@ -19,7 +18,7 @@ def replace_path(d, old_path, new_path):
             d[i] = replace_path(d[i], old_path, new_path)
     elif isinstance(d, str) and old_path in d:
         d = d.replace(old_path, new_path)
-        d = d.replace(new_path, f"$_/{os.path.basename(new_path)}")
+        d = d.replace(new_path, f"$_/{Path(new_path).name}")
     return d
 
 
@@ -45,25 +44,25 @@ def replace_path(d, old_path, new_path):
 
 channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input
 
-file_extension = pathlib.Path(args.output).suffix
+file_extension = Path(args.output).suffix
 
 if file_extension == ".dat" or file_extension == ".dir":
-    out_file = os.path.splitext(args.output)[0]
+    out_file = Path(args.output).with_suffix("")
 else:
     out_file = args.output
 
 rng = np.random.default_rng()
 temp_output = f"{out_file}.{rng.integers(0, 99999):05d}"
 
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml":
     out_dict = {}
     for channel in channel_files:
-        if pathlib.Path(channel).suffix == file_extension:
+        if Path(channel).suffix == file_extension:
             channel_dict = Props.read_from(channel)
 
-            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             channel_name = fkey.channel
             out_dict[channel_name] = channel_dict
         else:
@@ -72,29 +71,29 @@ def replace_path(d, old_path, new_path):
 
     Props.write_to(temp_output, out_dict, "json")
 
-    os.rename(temp_output, out_file)
+    Path(temp_output).rename(out_file)
 
 elif file_extension == ".pkl":
     out_dict = {}
     for channel in channel_files:
-        with open(channel, "rb") as r:
+        with Path(channel).open("rb") as r:
             channel_dict = pkl.load(r)
-        fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+        fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
         channel_name = fkey.channel
         out_dict[channel_name] = channel_dict
 
-    with open(temp_output, "wb") as w:
+    with Path(temp_output).open("wb") as w:
         pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
-    os.rename(temp_output, out_file)
+    Path(temp_output).rename(out_file)
 
 elif file_extension == ".dat" or file_extension == ".dir":
     common_dict = {}
     with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
         for channel in channel_files:
-            with open(channel, "rb") as r:
+            with Path(channel).open("rb") as r:
                 channel_dict = pkl.load(r)
-            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             channel_name = fkey.channel
             if isinstance(channel_dict, dict) and "common" in list(channel_dict):
                 chan_common_dict = channel_dict.pop("common")
@@ -108,8 +107,8 @@ def replace_path(d, old_path, new_path):
     if args.in_db:
         db_dict = Props.read_from(args.in_db)
     for channel in channel_files:
-        if pathlib.Path(channel).suffix == file_extension:
-            fkey = ChannelProcKey.get_filekey_from_pattern(os.path.basename(channel))
+        if Path(channel).suffix == file_extension:
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             channel_name = fkey.channel
 
             tb_in = lh5.read(f"{channel_name}", channel)
@@ -128,4 +127,4 @@ def replace_path(d, old_path, new_path):
     if args.out_db:
         Props.write_to(args.out_db, db_dict)
 
-    os.rename(temp_output, out_file)
+    Path(temp_output).rename(out_file)
diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index 52c2ed6..94473a0 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -1,7 +1,7 @@
 import argparse
-import os
 import pickle as pkl
 from datetime import datetime
+from pathlib import Path
 
 import matplotlib as mpl
 import matplotlib.dates as mdates
@@ -44,7 +44,7 @@
 # partitions could be different for different channels - do separately for each channel
 in_dicts = {}
 for file in args.input:
-    tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+    tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
     in_dicts[tstamp] = Props.read_from(file)
 
 plot_dict = {}
@@ -109,36 +109,36 @@
     plt.close()
 
 for file in args.output:
-    tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+    tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
     Props.write_to(file, in_dicts[tstamp])
 
 if args.out_plots:
     for file in args.out_plots:
-        tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
         if args.in_plots:
             for infile in args.in_plots:
                 if tstamp in infile:
-                    with open(infile, "rb") as f:
+                    with Path(infile).open("rb") as f:
                         old_plot_dict = pkl.load(f)
                     break
             old_plot_dict.update({"psp": plot_dict})
             new_plot_dict = old_plot_dict
         else:
             new_plot_dict = {"psp": plot_dict}
-        with open(file, "wb") as f:
+        with Path(file).open("wb") as f:
             pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
 if args.out_obj:
     for file in args.out_obj:
-        tstamp = ChannelProcKey.get_filekey_from_pattern(os.path.basename(file)).timestamp
+        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
         if args.in_obj:
             for infile in args.in_obj:
                 if tstamp in infile:
-                    with open(infile, "rb") as f:
+                    with Path(infile).open("rb") as f:
                         old_obj_dict = pkl.load(f)
                     break
             new_obj_dict = old_obj_dict
         else:
             new_obj_dict = {}
-        with open(file, "wb") as f:
+        with Path(file).open("wb") as f:
             pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index df97320..0d6ada7 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -1,6 +1,7 @@
 import argparse
 import logging
 import pickle as pkl
+from pathlib import Path
 
 from legendmeta.catalog import Props
 from lgdo import lh5
@@ -45,5 +46,5 @@
 log.debug("trained model")
 
 # Save trained model with pickle
-with open(args.output_file, "wb") as svm_file:
+with Path(args.output_file).open("wb") as svm_file:
     pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index f643e03..607613c 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -1,9 +1,8 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import time
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -52,7 +51,7 @@
 db_dict = Props.read_from(args.database)
 
 if dplms_dict["run_dplms"] is True:
-    with open(args.fft_raw_filelist) as f:
+    with Path(args.fft_raw_filelist).open() as f:
         fft_files = sorted(f.read().splitlines())
 
     t0 = time.time()
@@ -91,7 +90,7 @@
             display=1,
         )
         if args.inplots:
-            with open(args.inplots, "rb") as r:
+            with Path(args.inplots).open("rb") as r:
                 inplot_dict = pkl.load(r)
             inplot_dict.update({"dplms": plot_dict})
 
@@ -115,14 +114,14 @@
     out_dict = {}
     dplms_pars = Table(col_dict={"coefficients": Array([])})
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             inplot_dict = pkl.load(r)
     else:
         inplot_dict = {}
 
 db_dict.update(out_dict)
 
-pathlib.Path(os.path.dirname(args.lh5_path)).mkdir(parents=True, exist_ok=True)
+Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True)
 sto.write(
     Table(col_dict={"dplms": dplms_pars}),
     name=args.channel,
@@ -130,10 +129,10 @@
     wo_mode="overwrite",
 )
 
-pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.dsp_pars, db_dict)
 
 if args.plot_path:
-    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_path, "wb") as f:
+    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_path).open("wb") as f:
         pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 0edf617..bcda090 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -1,10 +1,9 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import time
 import warnings
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -351,19 +350,19 @@
     else:
         db_dict.update({"ctc_params": out_alpha_dict})
 
-    pathlib.Path(os.path.dirname(args.qbb_grid_path)).mkdir(parents=True, exist_ok=True)
-    with open(args.qbb_grid_path, "wb") as f:
+    Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.qbb_grid_path).open("wb") as f:
         pkl.dump(optimisers, f)
 
 else:
-    pathlib.Path(args.qbb_grid_path).touch()
+    Path(args.qbb_grid_path).touch()
 
-pathlib.Path(os.path.dirname(args.final_dsp_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.final_dsp_pars, db_dict)
 
 if args.plot_path:
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             plot_dict = pkl.load(r)
     else:
         plot_dict = {}
@@ -383,6 +382,6 @@
         "acq_space": bopt_zac.plot_acq(init_samples=sample_x),
     }
 
-    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_path, "wb") as w:
+    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_path).open("wb") as w:
         pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index ea2bb34..2e6505b 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -1,11 +1,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import time
 import warnings
 from bisect import bisect_left
+from pathlib import Path
 
 import lgdo
 import lgdo.lh5 as lh5
@@ -121,14 +120,14 @@ def get_out_data(
     peak_dict = Props.read_from(peak_json)
     db_dict = Props.read_from(args.decay_const)
 
-    pathlib.Path(os.path.dirname(args.peak_file)).mkdir(parents=True, exist_ok=True)
+    Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True)
     if peak_dict.pop("run_selection") is True:
         log.debug("Starting peak selection")
         rng = np.random.default_rng()
         rand_num = f"{rng.integers(0,99999):05d}"
         temp_output = f"{args.peak_file}.{rand_num}"
 
-        with open(args.raw_filelist) as f:
+        with Path(args.raw_filelist).open() as f:
             files = f.read().splitlines()
         raw_files = sorted(files)
 
@@ -138,7 +137,7 @@ def get_out_data(
 
         elif args.tcm_filelist:
             # get pulser mask from tcm files
-            with open(args.tcm_filelist) as f:
+            with Path(args.tcm_filelist).open() as f:
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
@@ -225,7 +224,7 @@ def get_out_data(
             }
 
         for file in raw_files:
-            log.debug(os.path.basename(file))
+            log.debug(Path(file).name)
             for peak, peak_dict in pk_dicts.items():
                 if peak_dict["idxs"] is not None:
                     # idx is a long continuous array
@@ -358,7 +357,7 @@ def get_out_data(
                                     log.debug(f"{peak} has reached the required number of events")
 
     else:
-        pathlib.Path(temp_output).touch()
+        Path(temp_output).touch()
 
     log.debug(f"event selection completed in {time.time()-t0} seconds")
-    os.rename(temp_output, args.peak_file)
+    Path(temp_output).rename(args.peak_file)
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 67ffd5f..47261d2 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -1,9 +1,8 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import time
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -57,7 +56,7 @@
 db_dict = Props.read_from(args.database)
 
 if opt_dict.pop("run_nopt") is True:
-    with open(args.raw_filelist) as f:
+    with Path(args.raw_filelist).open() as f:
         files = f.read().splitlines()
 
     raw_files = sorted(files)
@@ -96,15 +95,15 @@
     plot_dict = {}
 
 if args.plot_path:
-    pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             old_plot_dict = pkl.load(r)
         plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict)
     else:
         plot_dict = {"noise_optimisation": plot_dict}
-    with open(args.plot_path, "wb") as f:
+    with Path(args.plot_path).open("wb") as f:
         pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
-pathlib.Path(os.path.dirname(args.dsp_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict))
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py
index 28b335e..370e320 100644
--- a/scripts/pars_dsp_svm.py
+++ b/scripts/pars_dsp_svm.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 from legendmeta.catalog import Props
 
@@ -14,7 +13,7 @@
 
 
 if args.log is not None:
-    pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
+    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
     logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
 else:
     logging.basicConfig(level=logging.DEBUG)
@@ -27,9 +26,9 @@
 
 par_data = Props.read_from(args.input_file)
 
-file = f"'$_/{os.path.basename(args.svm_file)}'"
+file = f"'$_/{Path(args.svm_file).name}'"
 
 par_data["svm"] = {"model_file": file}
 
-pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
+Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.output_file, par_data)
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index c4750c6..82cec2d 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -1,8 +1,7 @@
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -52,7 +51,7 @@
     kwarg_dict.pop("run_tau")
     if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist":
         input_file = args.raw_files[0]
-        with open(input_file) as f:
+        with Path(input_file).open() as f:
             input_file = f.read().splitlines()
     else:
         input_file = args.raw_files
@@ -63,7 +62,7 @@
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -113,17 +112,17 @@
     tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])
 
     if args.plot_path:
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
 
         plot_dict = tau.plot_waveforms_after_correction(
             tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean")
         )
         plot_dict.update(tau.plot_slopes(slopes[idxs]))
 
-        with open(args.plot_path, "wb") as f:
+        with Path(args.plot_path).open("wb") as f:
             pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
 else:
     out_dict = {}
 
-pathlib.Path(os.path.dirname(args.output_file)).mkdir(parents=True, exist_ok=True)
+Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.output_file, tau.output_dict)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index be40ed5..a393868 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -2,10 +2,9 @@
 
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 from typing import Callable
 
 import numpy as np
@@ -142,7 +141,7 @@ def aoe_calibration(
 cal_dict = ecal_dict["pars"]
 eres_dict = ecal_dict["results"]["ecal"]
 
-with open(args.eres_file, "rb") as o:
+with Path(args.eres_file).open("rb") as o:
     object_dict = pkl.load(o)
 
 if kwarg_dict["run_aoe"] is True:
@@ -158,7 +157,7 @@ def aoe_calibration(
         for field, item in kwarg_dict["plot_options"].items():
             kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
 
-    with open(args.files[0]) as f:
+    with Path(args.files[0]).open() as f:
         files = f.read().splitlines()
     files = sorted(files)
 
@@ -210,7 +209,7 @@ def eres_func(x):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -246,7 +245,7 @@ def eres_func(x):
 if args.plot_file:
     common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             out_plot_dict = pkl.load(r)
         out_plot_dict.update({"aoe": plot_dict})
     else:
@@ -257,11 +256,11 @@ def eres_func(x):
     elif common_dict is not None:
         out_plot_dict["common"] = common_dict
 
-    pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_file, "wb") as w:
+    Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_file).open("wb") as w:
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
-pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
 results_dict = dict(**ecal_dict["results"], aoe=out_dict)
 final_hit_dict = {
     "pars": {"operations": cal_dict},
@@ -269,10 +268,10 @@ def eres_func(x):
 }
 Props.write_to(args.hit_pars, final_hit_dict)
 
-pathlib.Path(os.path.dirname(args.aoe_results)).mkdir(parents=True, exist_ok=True)
+Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
     **object_dict,
     aoe=obj,
 )
-with open(args.aoe_results, "wb") as w:
+with Path(args.aoe_results).open("wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index f7b8be3..b310500 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -3,11 +3,10 @@
 import argparse
 import copy
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
 from datetime import datetime
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import matplotlib as mpl
@@ -462,9 +461,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     db_files = [
         par_file
         for par_file in args.ctc_dict
-        if os.path.splitext(par_file)[1] == ".json"
-        or os.path.splitext(par_file)[1] == ".yml"
-        or os.path.splitext(par_file)[1] == ".yaml"
+        if Path(par_file).suffix in (".json", ".yml", ".yaml")
     ]
 
     database_dic = Props.read_from(db_files)
@@ -493,7 +490,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         bl_plots[field]["function"] = eval(item["function"])
     common_plots = kwarg_dict.pop("common_plots")
 
-    with open(args.files[0]) as f:
+    with Path(args.files[0]).open() as f:
         files = f.read().splitlines()
     files = sorted(files)
 
@@ -514,7 +511,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -725,7 +722,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
                 common_dict.update({key: param_dict})
 
         if args.inplot_dict:
-            with open(args.inplot_dict, "rb") as f:
+            with Path(args.inplot_dict).open("rb") as f:
                 total_plot_dict = pkl.load(f)
         else:
             total_plot_dict = {}
@@ -737,8 +734,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
         total_plot_dict.update({"ecal": plot_dict})
 
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-        with open(args.plot_path, "wb") as f:
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_path).open("wb") as f:
             pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
@@ -746,6 +743,6 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     Props.write_to(args.save_path, output_dict)
 
     # save calibration objects
-    with open(args.results_path, "wb") as fp:
-        pathlib.Path(os.path.dirname(args.results_path)).mkdir(parents=True, exist_ok=True)
+    with Path(args.results_path).open("wb") as fp:
+        Path(args.results_path).parent.mkdir(parents=True, exist_ok=True)
         pkl.dump({"ecal": full_object_dict}, fp, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index da83623..579b34a 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -2,10 +2,9 @@
 
 import argparse
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -160,7 +159,7 @@ def lq_calibration(
 cal_dict = ecal_dict["pars"]["operations"]
 eres_dict = ecal_dict["results"]["ecal"]
 
-with open(args.eres_file, "rb") as o:
+with Path(args.eres_file).open("rb") as o:
     object_dict = pkl.load(o)
 
 if kwarg_dict["run_lq"] is True:
@@ -172,7 +171,7 @@ def lq_calibration(
         for field, item in kwarg_dict["plot_options"].items():
             kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
 
-    with open(args.files[0]) as f:
+    with Path(args.files[0]).open() as f:
         files = f.read().splitlines()
     files = sorted(files)
 
@@ -213,7 +212,7 @@ def eres_func(x):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -247,7 +246,7 @@ def eres_func(x):
 if args.plot_file:
     common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
     if args.inplots:
-        with open(args.inplots, "rb") as r:
+        with Path(args.inplots).open("rb") as r:
             out_plot_dict = pkl.load(r)
         out_plot_dict.update({"lq": plot_dict})
     else:
@@ -258,24 +257,24 @@ def eres_func(x):
     elif common_dict is not None:
         out_plot_dict["common"] = common_dict
 
-    pathlib.Path(os.path.dirname(args.plot_file)).mkdir(parents=True, exist_ok=True)
-    with open(args.plot_file, "wb") as w:
+    Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.plot_file).open("wb") as w:
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 
 results_dict = dict(**eres_dict, lq=out_dict)
-pathlib.Path(os.path.dirname(args.hit_pars)).mkdir(parents=True, exist_ok=True)
+Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
 final_hit_dict = {
     "pars": {"operations": cal_dict},
     "results": results_dict,
 }
 Props.write_to(args.hit_pars, final_hit_dict)
 
-pathlib.Path(os.path.dirname(args.lq_results)).mkdir(parents=True, exist_ok=True)
+Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
     **object_dict,
     lq=obj,
 )
 Props.write_to(args.lq_results, final_object_dict)
-with open(args.lq_results, "wb") as w:
+with Path(args.lq_results).open("wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 9640087..5311c46 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -3,11 +3,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import numpy as np
 from legendmeta import LegendMetadata
@@ -160,7 +159,7 @@
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -226,10 +225,10 @@
     hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
-    pathlib.Path(os.path.dirname(args.save_path)).mkdir(parents=True, exist_ok=True)
+    Path(args.save_path).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.save_path, hit_dict)
 
     if args.plot_path:
-        pathlib.Path(os.path.dirname(args.plot_path)).mkdir(parents=True, exist_ok=True)
-        with open(args.plot_path, "wb") as f:
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_path).open("wb") as f:
             pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 8fb2b36..e9573e3 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -3,11 +3,10 @@
 import argparse
 import copy
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 from typing import Callable
 
 import numpy as np
@@ -32,7 +31,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -289,33 +288,33 @@ def eres_func(x):
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.input_files, list):
         files = []
         for file in args.input_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 files += f.read().splitlines()
     else:
-        with open(args.input_files) as f:
+        with Path(args.input_files).open() as f:
             files = f.read().splitlines()
 
     files = sorted(
@@ -325,7 +324,7 @@ def eres_func(x):
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -369,7 +368,7 @@ def eres_func(x):
 
         elif args.tcm_filelist:
             # get pulser mask from tcm files
-            with open(args.tcm_filelist) as f:
+            with Path(args.tcm_filelist).open() as f:
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
@@ -403,21 +402,21 @@ def eres_func(x):
 
         if args.plot_file:
             for plot_file in args.plot_file:
-                pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-                with open(plot_file, "wb") as w:
+                Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+                with Path(plot_file).open("wb") as w:
                     pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
         for out in sorted(args.hit_pars):
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
             final_hit_dict = {
                 "pars": cal_dict[fk.timestamp],
                 "results": results_dicts[fk.timestamp],
             }
-            pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+            Path(out).parent.mkdir(parents=True, exist_ok=True)
             Props.write_to(out, final_hit_dict)
 
         for out in args.aoe_results:
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-            pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-            with open(out, "wb") as w:
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+            Path(out).parent.mkdir(parents=True, exist_ok=True)
+            with Path(out).open("wb") as w:
                 pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 6ab1a4b..4064b3c 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -3,10 +3,9 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -32,7 +31,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -83,29 +82,29 @@ def run_splitter(files):
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     files = []
     for file in args.input_files:
-        with open(file) as f:
+        with Path(file).open() as f:
             files += f.read().splitlines()
 
     files = sorted(
@@ -115,7 +114,7 @@ def run_splitter(files):
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -179,7 +178,7 @@ def run_splitter(files):
     if args.pulser_files:
         mask = np.array([], dtype=bool)
         for file in args.pulser_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 pulser_dict = json.load(f)
             pulser_mask = np.array(pulser_dict["mask"])
             mask = np.append(mask, pulser_mask)
@@ -188,7 +187,7 @@ def run_splitter(files):
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -249,22 +248,22 @@ def run_splitter(files):
 
     if args.plot_file:
         for plot_file in args.plot_file:
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(plot_file).open("wb") as w:
                 pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
     for out in sorted(args.hit_pars):
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
         final_hit_dict = {
             "pars": {"operations": cal_dict[fk.timestamp]},
             "results": results_dicts[fk.timestamp],
         }
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "w") as w:
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("w") as w:
             json.dump(final_hit_dict, w, indent=4)
 
     for out in args.fit_results:
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "wb") as w:
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("wb") as w:
             pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 890554f..2ba88af 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -4,10 +4,9 @@
 import copy
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -32,7 +31,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -285,33 +284,33 @@ def eres_func(x):
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.input_files, list):
         files = []
         for file in args.input_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 files += f.read().splitlines()
     else:
-        with open(args.input_files) as f:
+        with Path(args.input_files).open() as f:
             files = f.read().splitlines()
 
     files = sorted(
@@ -321,7 +320,7 @@ def eres_func(x):
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -348,7 +347,7 @@ def eres_func(x):
         if args.pulser_files:
             mask = np.array([], dtype=bool)
             for file in args.pulser_files:
-                with open(file) as f:
+                with Path(file).open() as f:
                     pulser_dict = json.load(f)
                 pulser_mask = np.array(pulser_dict["mask"])
                 mask = np.append(mask, pulser_mask)
@@ -357,7 +356,7 @@ def eres_func(x):
 
         elif args.tcm_filelist:
             # get pulser mask from tcm files
-            with open(args.tcm_filelist) as f:
+            with Path(args.tcm_filelist).open() as f:
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
@@ -391,22 +390,22 @@ def eres_func(x):
 
     if args.plot_file:
         for plot_file in args.plot_file:
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(plot_file).open("wb") as w:
                 pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
     for out in sorted(args.hit_pars):
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
         final_hit_dict = {
             "pars": {"operations": cal_dict[fk.timestamp]},
             "results": results_dicts[fk.timestamp],
         }
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "w") as w:
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("w") as w:
             json.dump(final_hit_dict, w, indent=4)
 
     for out in args.lq_results:
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "wb") as w:
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("wb") as w:
             pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index b6f12d7..a6eab18 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -3,11 +3,10 @@
 import argparse
 import copy
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -34,7 +33,7 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.period}-{fk.run}")
             run_files.append([])
@@ -447,29 +446,29 @@ def calibrate_partition(
     for ecal in args.ecal_file:
         cal = Props.read_from(ecal)
 
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         cal_dict[fk.timestamp] = cal["pars"]
         results_dicts[fk.timestamp] = cal["results"]
 
     object_dict = {}
     for ecal in args.eres_file:
-        with open(ecal, "rb") as o:
+        with Path(ecal).open("rb") as o:
             cal = pkl.load(o)
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
         object_dict[fk.timestamp] = cal
 
     inplots_dict = {}
     if args.inplots:
         for ecal in args.inplots:
-            with open(ecal, "rb") as o:
+            with Path(ecal).open("rb") as o:
                 cal = pkl.load(o)
-            fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(ecal))
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(ecal).name)
             inplots_dict[fk.timestamp] = cal
 
     # sort files in dictionary where keys are first timestamp from run
     files = []
     for file in args.input_files:
-        with open(file) as f:
+        with Path(file).open() as f:
             files += f.read().splitlines()
 
     files = sorted(
@@ -479,7 +478,7 @@ def calibrate_partition(
     final_dict = {}
     all_file = run_splitter(sorted(files))
     for filelist in all_file:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(sorted(filelist)[0]))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(sorted(filelist)[0]).name)
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
@@ -518,7 +517,7 @@ def calibrate_partition(
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
@@ -553,21 +552,21 @@ def calibrate_partition(
 
     if args.plot_file:
         for plot_file in args.plot_file:
-            pathlib.Path(os.path.dirname(plot_file)).mkdir(parents=True, exist_ok=True)
-            with open(plot_file, "wb") as w:
+            Path(plot_file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(plot_file).open("wb") as w:
                 pkl.dump(plot_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
 
     for out in sorted(args.hit_pars):
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
         final_hit_dict = {
             "pars": cal_dict[fk.timestamp],
             "results": results_dicts[fk.timestamp],
         }
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
         Props.write_to(out, final_hit_dict)
 
     for out in args.fit_results:
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(out))
-        pathlib.Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
-        with open(out, "wb") as w:
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
+        Path(out).parent.mkdir(parents=True, exist_ok=True)
+        with Path(out).open("wb") as w:
             pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index f62da8b..790ee0a 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -3,11 +3,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import numpy as np
 from legendmeta import LegendMetadata
@@ -72,10 +71,10 @@
     if isinstance(args.cal_files, list):
         cal_files = []
         for file in args.cal_files:
-            with open(file) as f:
+            with Path(file).open() as f:
                 cal_files += f.read().splitlines()
     else:
-        with open(args.cal_files) as f:
+        with Path(args.cal_files).open() as f:
             cal_files = f.read().splitlines()
 
     cal_files = sorted(
@@ -99,10 +98,10 @@
         if isinstance(args.fft_files, list):
             fft_files = []
             for file in args.fft_files:
-                with open(file) as f:
+                with Path(file).open() as f:
                     fft_files += f.read().splitlines()
         else:
-            with open(args.fft_files) as f:
+            with Path(args.fft_files).open() as f:
                 fft_files = f.read().splitlines()
 
         fft_files = sorted(
@@ -223,7 +222,7 @@
 
     elif args.tcm_filelist:
         # get pulser mask from tcm files
-        with open(args.tcm_filelist) as f:
+        with Path(args.tcm_filelist).open() as f:
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, total_mask = get_tcm_pulser_ids(
@@ -305,11 +304,11 @@
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
     for file in args.save_path:
-        pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+        Path(file).parent.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, hit_dict)
 
     if args.plot_path:
         for file in args.plot_path:
-            pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
-            with open(file, "wb") as f:
+            Path(file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(file).open("wb") as f:
                 pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 10af322..48f3d9f 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -3,11 +3,10 @@
 import argparse
 import json
 import logging
-import os
-import pathlib
 import pickle as pkl
 import re
 import warnings
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -64,7 +63,7 @@
     if isinstance(args.phy_files, list):
         phy_files = []
         for file in sorted(args.phy_files):
-            with open(file) as f:
+            with Path(file).open() as f:
                 run_files = f.read().splitlines()
             if len(run_files) == 0:
                 continue
@@ -78,7 +77,7 @@
                 )
                 bl_mask = np.append(bl_mask, bl_idxs)
     else:
-        with open(args.phy_files) as f:
+        with Path(args.phy_files).open() as f:
             phy_files = f.read().splitlines()
         phy_files = sorted(np.unique(phy_files))
         bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0]
@@ -147,11 +146,11 @@
     log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}")
 
     for file in args.save_path:
-        pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
+        Path(file).name.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, {"pars": {"operations": hit_dict}})
 
     if args.plot_path:
         for file in args.plot_path:
-            pathlib.Path(os.path.dirname(file)).mkdir(parents=True, exist_ok=True)
-            with open(file, "wb") as f:
+            Path(file).parent.mkdir(parents=True, exist_ok=True)
+            with Path(file).open("wb") as f:
                 pkl.dump({"qc": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index f72a04a..27c1101 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -1,7 +1,6 @@
 import argparse
 import logging
-import os
-import pathlib
+from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
@@ -41,7 +40,7 @@
 
 if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
     tcm_files = args.tcm_files[0]
-    with open(tcm_files) as f:
+    with Path(tcm_files).open() as f:
         tcm_files = f.read().splitlines()
 else:
     tcm_files = args.tcm_files
@@ -51,5 +50,5 @@
     tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
 )
 
-pathlib.Path(os.path.dirname(args.pulser_file)).mkdir(parents=True, exist_ok=True)
+Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py
index 5c01f97..9f646cc 100644
--- a/scripts/util/FileKey.py
+++ b/scripts/util/FileKey.py
@@ -2,9 +2,9 @@
 This module contains classes to convert between keys and files using the patterns defined in patterns.py
 """
 
-import os
 import re
 from collections import namedtuple
+from pathlib import Path
 
 import snakemake as smk
 
@@ -216,7 +216,7 @@ def per_grouper(files):
     pers = []
     per_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.experiment}-{fk.period}" not in pers:
             pers.append(f"{fk.experiment}-{fk.period}")
             per_files.append([])
@@ -231,7 +231,7 @@ def run_grouper(files):
     runs = []
     run_files = []
     for file in files:
-        fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(file))
+        fk = ProcessingFileKey.get_filekey_from_pattern(Path(file).name)
         if f"{fk.experiment}-{fk.period}-{fk.run}" not in runs:
             runs.append(f"{fk.experiment}-{fk.period}-{fk.run}")
             run_files.append([])
diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py
index aec1572..651c137 100644
--- a/scripts/util/cal_grouping.py
+++ b/scripts/util/cal_grouping.py
@@ -3,7 +3,7 @@
 """
 
 import json
-import os
+from pathlib import Path
 
 from .FileKey import ChannelProcKey, ProcessingFileKey
 from .patterns import (
@@ -16,7 +16,7 @@
 
 class cal_grouping:
     def __init__(self, setup, input_file):
-        with open(input_file) as r:
+        with Path(input_file).open() as r:
             self.datasets = json.load(r)
         self.expand_runs()
         self.setup = setup
@@ -43,18 +43,13 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal
         for per in dataset:
             if dataset[per] == "all":
                 files += [
-                    os.path.join(
-                        filelist_path(self.setup),
-                        f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist",
-                    )
+                    Path(filelist_path(self.setup))
+                    / f"all-{experiment}-{per}-*-{datatype}-{tier}.filelist"
                 ]
             else:
                 files += [
-                    os.path.join(
-                        filelist_path(self.setup),
-                        f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist",
-                    )
-                    for run in dataset[per]
+                    Path(filelist_path(self.setup))
+                    / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist"
                 ]
         return files
 
@@ -80,7 +75,7 @@ def get_par_files(
             channel = "{channel}"
         selected_par_files = []
         for par_file in all_par_files:
-            fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file))
+            fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
             if (
                 fk.datatype == datatype
                 and fk.experiment == experiment
@@ -128,7 +123,7 @@ def get_plt_files(
             channel = "{channel}"
         selected_par_files = []
         for par_file in all_par_files:
-            fk = ProcessingFileKey.get_filekey_from_pattern(os.path.basename(par_file))
+            fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
             if (
                 fk.datatype == datatype
                 and fk.experiment == experiment
@@ -170,7 +165,7 @@ def get_log_file(
             datatype=datatype,
             name=name,
         )
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0]))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
         if channel == "default":
             fk.channel = "{channel}"
         else:
@@ -187,7 +182,7 @@ def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", data
             datatype=datatype,
             name=None,
         )
-        fk = ChannelProcKey.get_filekey_from_pattern(os.path.basename(par_files[0]))
+        fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
         return fk.timestamp
 
     def get_wildcard_constraints(self, dataset, channel):
diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py
index 1fb516b..390a7c1 100644
--- a/scripts/util/catalog.py
+++ b/scripts/util/catalog.py
@@ -43,7 +43,7 @@ def read_impl(sources):
                     with file_name.open() as file:
                         return yaml.safe_load(file)
                 elif file_name.suffix == ".json":
-                    with open(file_name) as file:
+                    with file_name.open() as file:
                         return json.load(file)
                 else:
                     msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}"
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index 2fc3525..f347975 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -2,10 +2,10 @@
 This module creates the validity files used for determining the time validity of data
 """
 
-import glob
 import json
 import re
 import warnings
+from pathlib import Path
 
 import snakemake as smk
 import yaml
@@ -40,13 +40,13 @@ def from_filekey(cls, filekey, name_dict):
 
     @staticmethod
     def write_to_jsonl(file_names, path):
-        with open(path, "w") as of:
+        with Path(path).open("w") as of:
             for file_name in file_names:
                 of.write(f"{file_name.get_json()}\n")
 
     @staticmethod
     def write_to_yaml(file_names, path):
-        with open(path, "w") as of:
+        with Path(path).open("w") as of:
             yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False)
 
     @staticmethod
@@ -104,7 +104,7 @@ def get_keys(keypart, search_pattern):
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(search_pattern))
         fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
-        files = glob.glob(fn_glob_pattern)
+        files = Path(fn_glob_pattern).glob()
         keys = []
         for f in files:
             m = tier_pattern_rx.match(f)
diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py
index 7a9dd87..a21f6ae 100644
--- a/scripts/util/pars_loading.py
+++ b/scripts/util/pars_loading.py
@@ -3,7 +3,7 @@
 to determine the par and par overwrite for a particular timestamp
 """
 
-import os
+from pathlib import Path
 
 from .catalog import Catalog
 from .FileKey import ProcessingFileKey
@@ -29,19 +29,18 @@ def match_pars_files(filelist1, filelist2):
 
     @staticmethod
     def get_par_file(setup, timestamp, tier):
-        par_file = os.path.join(get_pars_path(setup, tier), "validity.yaml")
+        par_file = Path(get_pars_path(setup, tier)) / "validity.yaml"
         pars_files = pars_catalog.get_calib_files(par_file, timestamp)
-        par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.yaml")
+        par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
         pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp)
         if len(pars_files_overwrite) > 0:
             pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
                 pars_files, pars_files_overwrite
             )
-        pars_files = [os.path.join(get_pars_path(setup, tier), file) for file in pars_files]
+        pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files]
         if len(pars_files_overwrite) > 0:
             pars_overwrite_files = [
-                os.path.join(par_overwrite_path(setup), tier, file)
-                for file in pars_files_overwrite
+                Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite
             ]
             pars_files += pars_overwrite_files
         return pars_files
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 7f0b30c..cae1cd0 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -2,7 +2,7 @@
 This module contains all the patterns needed for the data production
 """
 
-import os
+from pathlib import Path
 
 from .utils import (
     get_pars_path,
@@ -56,61 +56,63 @@ def full_channel_pattern_with_extension():
 
 def get_pattern_unsorted_data(setup):
     if sandbox_path(setup) is not None:
-        return os.path.join(
-            f"{sandbox_path(setup)}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca",
+        return (
+            Path(f"{sandbox_path(setup)}")
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
         )
     else:
         return None
 
 
 def get_pattern_tier_daq(setup):
-    return os.path.join(
-        f"{tier_daq_path(setup)}",
-        "{datatype}",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca",
+    return (
+        Path(f"{tier_daq_path(setup)}")
+        / "{datatype}"
+        / "{period}"
+        / "{run}"
+        / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
     )
 
 
 def get_pattern_tier_raw_blind(setup):
-    return os.path.join(
-        f"{tier_raw_blind_path(setup)}",
-        "phy",
-        "{period}",
-        "{run}",
-        "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5",
+    return (
+        Path(f"{tier_raw_blind_path(setup)}")
+        / "phy"
+        / "{period}"
+        / "{run}"
+        / "{experiment}-{period}-{run}-phy-{timestamp}-tier_raw.lh5"
     )
 
 
 def get_pattern_tier(setup, tier, check_in_cycle=True):
     if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
-        file_pattern = os.path.join(
-            get_tier_path(setup, tier),
-            "{datatype}",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5",
+        file_pattern = (
+            Path(get_tier_path(setup, tier))
+            / "{datatype}"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_"
+            + f"{tier}.lh5"
         )
     elif tier in ["evt_concat", "pet_concat"]:
-        file_pattern = os.path.join(
-            get_tier_path(setup, tier[:3]),
-            "{datatype}",
-            "{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5",
+        file_pattern = (
+            Path(get_tier_path(setup, tier[:3]))
+            / "{datatype}"
+            / "{experiment}-{period}-{run}-{datatype}-tier_"
+            + f"{tier[:3]}.lh5"
         )
 
     elif tier == "skm":
-        file_pattern = os.path.join(
-            f"{tier_skm_path(setup)}",
-            "phy",
-            "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5",
+        file_pattern = (
+            Path(f"{tier_skm_path(setup)}")
+            / "phy"
+            / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5"
         )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if tier_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
-        return "/tmp/" + os.path.basename(file_pattern)
+    if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
+        return "/tmp/" + Path(file_pattern).name
     else:
         return file_pattern
 
@@ -118,25 +120,27 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
 def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=True):
     if tier in ["raw", "tcm", "dsp", "hit", "evt", "psp", "pht", "pet"]:
         if name is not None:
-            return os.path.join(
-                get_pars_path(setup, tier),
-                "cal",
-                "{period}",
-                "{run}",
-                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}_{name}.{extension}",
+            return (
+                Path(get_pars_path(setup, tier))
+                / "cal"
+                / "{period}"
+                / "{run}"
+                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                + f"{tier}_{name}.{extension}"
             )
         else:
-            file_pattern = os.path.join(
-                get_pars_path(setup, tier),
-                "cal",
-                "{period}",
-                "{run}",
-                "{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}",
+            file_pattern = (
+                Path(get_pars_path(setup, tier))
+                / "cal"
+                / "{period}"
+                / "{run}"
+                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                + f"{tier}.{extension}"
             )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if pars_path(setup) not in os.path.normpath(file_pattern) and check_in_cycle is True:
+    if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
         if name is None:
             return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}"
         else:
@@ -150,46 +154,48 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
 
 def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
     if name is not None:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}",
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-"
+            + f"par_{tier}_{name}.{ext}"
         )
     else:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}",
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-"
+            + f"par_{tier}.{ext}"
         )
 
 
 def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
     if name is not None:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "{datatype}",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            + f"{tier}_{name}-overwrite.{extension}",
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "{datatype}"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+            f"{tier}_{name}-overwrite.{extension}"
         )
     else:
-        return os.path.join(
-            f"{par_overwrite_path(setup)}",
-            tier,
-            "{datatype}",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+        return (
+            Path(f"{par_overwrite_path(setup)}")
+            / tier
+            / "{datatype}"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
             + tier
-            + f"-overwrite.{extension}",
+            + f"-overwrite.{extension}"
         )
 
 
@@ -197,90 +203,104 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
     if datatype is None:
         datatype = "{datatype}"
     if name is None:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-"
+        return (
+            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
             + datatype
             + "-{timestamp}-par_"
-            + f"{tier}.{extension}",
+            + f"{tier}.{extension}"
         )
     else:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-"
+        return (
+            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
             + datatype
             + "-{timestamp}"
-            + f"par_{tier}_{name}.{extension}",
+            + f"par_{tier}_{name}.{extension}"
         )
 
 
 def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}",
+        return (
+            Path(f"{tmp_par_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+            + f"{tier}.{extension}"
         )
     else:
-        return os.path.join(
-            f"{tmp_par_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
-            + f"{tier}_{name}.{extension}",
+        return (
+            Path(f"{tmp_par_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+            + f"{tier}_{name}.{extension}"
         )
 
 
 def get_pattern_plts_tmp_channel(setup, tier, name=None):
     if name is None:
-        return os.path.join(
-            f"{tmp_plts_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl",
+        return (
+            Path(f"{tmp_plts_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
+            + tier
+            + ".pkl"
         )
     else:
-        return os.path.join(
-            f"{tmp_plts_path(setup)}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl",
+        return (
+            Path(f"{tmp_plts_path(setup)}")
+            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
+            + f"{tier}_{name}.pkl"
         )
 
 
 def get_pattern_plts(setup, tier, name=None):
     if name is None:
-        return os.path.join(
-            f"{plts_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir",
+        return (
+            Path(f"{plts_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
+            + tier
+            + ".dir"
         )
     else:
-        return os.path.join(
-            f"{plts_path(setup)}",
-            tier,
-            "cal",
-            "{period}",
-            "{run}",
-            "{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir",
+        return (
+            Path(f"{plts_path(setup)}")
+            / tier
+            / "cal"
+            / "{period}"
+            / "{run}"
+            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
+            + tier
+            + "_"
+            + name
+            + ".dir"
         )
 
 
 def get_pattern_log(setup, processing_step):
-    return os.path.join(
-        f"{tmp_log_path(setup)}",
-        processing_step,
-        "{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log",
+    return (
+        Path(f"{tmp_log_path(setup)}")
+        / processing_step
+        / "{experiment}-{period}-{run}-{datatype}-{timestamp}-"
+        + processing_step
+        + ".log"
     )
 
 
 def get_pattern_log_channel(setup, processing_step):
-    return os.path.join(
-        f"{tmp_log_path(setup)}",
-        processing_step,
-        "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log",
+    return (
+        Path(f"{tmp_log_path(setup)}")
+        / processing_step
+        / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-"
+        + processing_step
+        + ".log"
     )
 
 
 def get_pattern_log_concat(setup, processing_step):
-    return os.path.join(
-        f"{tmp_log_path(setup)}",
-        processing_step,
-        "{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log",
+    return (
+        Path(f"{tmp_log_path(setup)}")
+        / processing_step
+        / "{experiment}-{period}-{run}-{datatype}-"
+        + processing_step
+        + ".log"
     )
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 2cb53ef..fd433c7 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -189,7 +189,7 @@ def subst_vars_in_snakemake_config(workflow, config):
     config_filename = workflow.overwrite_configfiles[0]  # ToDo: Better way of handling this?
     subst_vars(
         config,
-        var_values={"_": os.path.dirname(config_filename)},
+        var_values={"_": Path(config_filename).parent},
         use_env=True,
         ignore_missing=False,
     )
@@ -203,8 +203,8 @@ def run_splitter(files):
     runs = []
     run_files = []
     for file in files:
-        base = os.path.basename(file)
-        file_name = os.path.splitext(base)[0]
+        base = Path(file).name
+        file_name = Path(base).name
         parts = file_name.split("-")
         run_no = parts[3]
         if run_no not in runs:
diff --git a/tests/test_util.py b/tests/test_util.py
index 707843b..010c749 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,5 +1,4 @@
 import json
-import os
 from pathlib import Path
 
 from scripts.util import (
@@ -20,7 +19,7 @@
 
 testprod = Path(__file__).parent / "dummy_cycle"
 
-with open(str(testprod / "config.json")) as r:
+with testprod.open() as r:
     setup = json.load(r)
 subst_vars(setup, var_values={"_": str(testprod)})
 setup = setup["setups"]["test"]
@@ -107,12 +106,12 @@ def test_create_pars_keylist():
 
 def test_pars_loading():
     pars_files = CalibCatalog.get_calib_files(
-        os.path.join(par_dsp_path(setup), "validity.jsonl"), "20230101T123456Z"
+        Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z"
     )
     assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]
 
     par_override_files = CalibCatalog.get_calib_files(
-        os.path.join(par_overwrite_path(setup), "dsp", "validity.jsonl"), "20230101T123456Z"
+        Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z"
     )
 
     pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
@@ -122,12 +121,12 @@ def test_pars_loading():
     assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]
 
     assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == {
-        os.path.join(
-            par_dsp_path(setup),
-            "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
+        (
+            Path(par_dsp_path(setup))
+            / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
         ),
-        os.path.join(
-            par_overwrite_path(setup),
-            "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json",
+        (
+            Path(par_overwrite_path(setup))
+            / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json",
         ),
     }

From 323dd0966c02bd9486c91bebde472ed965b13517 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 28 Nov 2024 19:04:37 +0100
Subject: [PATCH 010/101] debugging

---
 Snakefile                           |  92 +++++++++---------------
 rules/blinding_calibration.smk      |  10 +--
 rules/blinding_check.smk            |  10 +--
 rules/chanlist_gen.smk              |   8 +--
 rules/common.smk                    |  50 +++++++------
 rules/dsp.smk                       |  33 +++++----
 rules/evt.smk                       |  11 +--
 rules/filelist_gen.smk              |  34 ++++++---
 rules/hit.smk                       |  24 ++++---
 rules/pht.smk                       |  35 +++++----
 rules/pht_fast.smk                  |   6 +-
 rules/psp.smk                       |  41 +++++++----
 rules/qc_phy.smk                    |  11 ++-
 rules/raw.smk                       |   1 -
 scripts/create_chankeylist.py       |   7 +-
 scripts/util/FileKey.py             |   8 +++
 scripts/util/__init__.py            |  16 ++---
 scripts/util/cal_grouping.py        |  38 +++++++---
 scripts/util/catalog.py             |   2 +-
 scripts/util/create_pars_keylist.py |  31 ++++----
 scripts/util/pars_loading.py        |   8 +--
 scripts/util/patterns.py            | 106 +++++++++++-----------------
 scripts/util/utils.py               |   4 ++
 23 files changed, 311 insertions(+), 275 deletions(-)

diff --git a/Snakefile b/Snakefile
index b2daaa2..39a3dee 100644
--- a/Snakefile
+++ b/Snakefile
@@ -10,7 +10,7 @@ This includes:
 - the same for partition level tiers
 """
 
-import pathlib
+from pathlib import Path
 import os
 import json
 import sys
@@ -20,8 +20,8 @@ from collections import OrderedDict
 import logging
 
 import scripts.util as ds
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.patterns import get_pattern_tier_raw
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.patterns import get_pattern_tier
 from scripts.util.utils import (
     subst_vars_in_snakemake_config,
     runcmd,
@@ -31,6 +31,7 @@ from scripts.util.utils import (
     metadata_path,
     tmp_log_path,
     pars_path,
+    det_status_path,
 )
 
 # Set with `snakemake --configfile=/path/to/your/config.json`
@@ -43,8 +44,9 @@ setup = config["setups"]["l200"]
 configs = config_path(setup)
 chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
+det_status = det_status_path(setup)
 swenv = runcmd(setup)
-part = ds.cal_grouping(setup, os.path.join(configs, "partitions.json"))
+part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml")
 basedir = workflow.basedir
 
 
@@ -72,32 +74,6 @@ include: "rules/blinding_calibration.smk"
 include: "rules/qc_phy.smk"
 
 
-# Log parameter catalogs in validity.jsonl files
-hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl")
-if os.path.isfile(hit_par_cat_file):
-    os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
-pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file)
-
-pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl")
-if os.path.isfile(pht_par_cat_file):
-    os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl"))
-pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file)
-
-dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl")
-if os.path.isfile(dsp_par_cat_file):
-    os.remove(dsp_par_cat_file)
-pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file)
-
-psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl")
-if os.path.isfile(psp_par_cat_file):
-    os.remove(psp_par_cat_file)
-pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file)
-
-
 localrules:
     gen_filelist,
     autogen_output,
@@ -111,36 +87,36 @@ onstart:
         shell('{swenv} python3 -B -c "import ' + pkg + '"')
 
         # Log parameter catalogs in validity.jsonl files
-    hit_par_cat_file = os.path.join(pars_path(setup), "hit", "validity.jsonl")
-    if os.path.isfile(hit_par_cat_file):
-        os.remove(os.path.join(pars_path(setup), "hit", "validity.jsonl"))
-    pathlib.Path(os.path.dirname(hit_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(hit_par_catalog, hit_par_cat_file)
-
-    pht_par_cat_file = os.path.join(pars_path(setup), "pht", "validity.jsonl")
-    if os.path.isfile(pht_par_cat_file):
-        os.remove(os.path.join(pars_path(setup), "pht", "validity.jsonl"))
-    pathlib.Path(os.path.dirname(pht_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(pht_par_catalog, pht_par_cat_file)
-
-    dsp_par_cat_file = os.path.join(pars_path(setup), "dsp", "validity.jsonl")
-    if os.path.isfile(dsp_par_cat_file):
-        os.remove(dsp_par_cat_file)
-    pathlib.Path(os.path.dirname(dsp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(dsp_par_catalog, dsp_par_cat_file)
-
-    psp_par_cat_file = os.path.join(pars_path(setup), "psp", "validity.jsonl")
-    if os.path.isfile(psp_par_cat_file):
-        os.remove(psp_par_cat_file)
-    pathlib.Path(os.path.dirname(psp_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(psp_par_catalog, psp_par_cat_file)
+    hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml"
+    if hit_par_cat_file.is_file():
+        hit_par_cat_file.unlink()
+    Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file)
+
+    pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml"
+    if pht_par_cat_file.is_file():
+        pht_par_cat_file.unlink()
+    Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file)
+
+    dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml"
+    if dsp_par_cat_file.is_file():
+        dsp_par_cat_file.unlink()
+    Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
+
+    psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml"
+    if psp_par_cat_file.is_file():
+        psp_par_cat_file.unlink()
+    Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+    ds.ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
 
 
 onsuccess:
     from snakemake.report import auto_report
 
     rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
-    pathlib.Path(rep_dir).mkdir(parents=True, exist_ok=True)
+    Path(rep_dir).mkdir(parents=True, exist_ok=True)
     # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
 
     with open(os.path.join(rep_dir, "dag.txt"), "w") as f:
@@ -190,12 +166,12 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier_raw(setup),
-            ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"),
-            analysis_runs_file=os.path.join(configs, "analysis_runs.json"),
+            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
+            analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        os.path.join(filelist_path(setup), "{label}-{tier}.filelist"),
+        Path(filelist_path(setup)) / "{label}-{tier}.filelist",
     run:
         if len(input) == 0:
             print(
diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk
index bcf0d64..85ee2f6 100644
--- a/rules/blinding_calibration.smk
+++ b/rules/blinding_calibration.smk
@@ -11,6 +11,7 @@ from scripts.util.patterns import (
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
 )
+from pathlib import Path
 
 
 rule build_blinding_calibration:
@@ -19,9 +20,8 @@ rule build_blinding_calibration:
     if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data
     """
     input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
+        files=Path(filelist_path(setup))
+        / "all-{experiment}-{period}-{run}-cal-raw.filelist",
     params:
         timestamp="{timestamp}",
         datatype="cal",
@@ -57,7 +57,7 @@ rule build_plts_blinding:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="blindcal",
         ),
@@ -79,7 +79,7 @@ rule build_pars_blinding:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="blindcal",
         ),
diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk
index ac7240c..eb3407d 100644
--- a/rules/blinding_check.smk
+++ b/rules/blinding_check.smk
@@ -12,6 +12,7 @@ from scripts.util.patterns import (
     get_pattern_plts,
     get_pattern_pars,
 )
+from pathlib import Path
 
 
 rule build_blinding_check:
@@ -20,9 +21,8 @@ rule build_blinding_check:
     if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data
     """
     input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
+        files=Path(filelist_path(setup))
+        / "all-{experiment}-{period}-{run}-cal-raw.filelist",
         par_file=get_blinding_curve_file,
     params:
         timestamp="{timestamp}",
@@ -59,7 +59,7 @@ rule build_plts_raw:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -80,7 +80,7 @@ rule build_pars_raw:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
         plts=get_pattern_plts(
diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk
index 1dc4957..820d0fa 100644
--- a/rules/chanlist_gen.smk
+++ b/rules/chanlist_gen.smk
@@ -13,7 +13,7 @@ from scripts.util.utils import filelist_path, runcmd
 
 
 def get_par_chanlist(
-    setup, keypart, tier, basedir, configs, chan_maps, name=None, extension="json"
+    setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml"
 ):
     tier_pattern = "((?P<file_type>[^_]+)(\\_(?P<tier>[^_]+)(\\_(?P<name>[^_]+)?)?)?)?"
     keypart_rx = re.compile(tier_pattern)
@@ -28,7 +28,7 @@ def get_par_chanlist(
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}"
+    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}"
     cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}"
     os.system(cmd)
 
@@ -42,7 +42,7 @@ def get_par_chanlist(
     return filenames
 
 
-def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=None):
+def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None):
     key = ChannelProcKey.parse_keypart(keypart)
 
     output_file = os.path.join(
@@ -50,7 +50,7 @@ def get_plt_chanlist(setup, keypart, tier, basedir, configs, chan_maps, name=Non
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --configs {configs}"
+    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}"
     cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}"
     os.system(cmd)
 
diff --git a/rules/common.smk b/rules/common.smk
index b985044..6ba4654 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -2,16 +2,17 @@
 Helper functions for running data production
 """
 
-import pathlib, os
+from pathlib import Path
 from scripts.util.patterns import (
     par_overwrite_path,
-    par_raw_path,
+    get_pars_path,
     get_pattern_unsorted_data,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_plts_tmp_channel,
 )
 from scripts.util import ProcessingFileKey
+from scripts.util.catalog import Catalog
 from scripts.util import utils
 
 
@@ -21,8 +22,8 @@ def ro(path):
 
 def get_blinding_curve_file(wildcards):
     """func to get the blinding calibration curves from the overrides"""
-    par_files = pars_catalog.get_calib_files(
-        Path(par_overwrite_path(setup)) / "raw" / "validity.jsonl",
+    par_files = Catalog.get_files(
+        Path(par_overwrite_path(setup)) / "raw" / "validity.yaml",
         wildcards.timestamp,
     )
     if isinstance(par_files, str):
@@ -36,13 +37,13 @@ def get_blinding_curve_file(wildcards):
 
 def get_blinding_check_file(wildcards):
     """func to get the right blinding check file"""
-    par_files = pars_catalog.get_calib_files(
-        Path(par_raw_path(setup)) / "validity.jsonl", wildcards.timestamp
+    par_files = Catalog.get_files(
+        Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp
     )
     if isinstance(par_files, str):
-        return str(Path(par_raw_path(setup)) / par_files)
+        return Path(get_pars_path(setup, "raw")) / par_files
     else:
-        return [str(Path(par_raw_path(setup)) / par_file) for par_file in par_files]
+        return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files]
 
 
 def set_last_rule_name(workflow, new_name):
@@ -70,35 +71,38 @@ def set_last_rule_name(workflow, new_name):
     workflow.check_localrules()
 
 
-def get_svm_file(wildcards, tier, name):
-    par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
-    pars_files_overwrite = pars_catalog.get_calib_files(
-        par_overwrite_file, wildcards.timestamp
+def get_input_par_file(wildcards, tier, name):
+    par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
+    pars_files_overwrite = Catalog.get_files(
+        par_overwrite_file,
+        wildcards.timestamp,
     )
     for pars_file in pars_files_overwrite:
-        if name in pars_file:
-            return os.path.join(par_overwrite_path(setup), tier, pars_file)
+        if name in str(pars_file):
+            return Path(par_overwrite_path(setup)) / tier / pars_file
     raise ValueError(f"Could not find model in {pars_files_overwrite}")
 
 
 def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
-    par_overwrite_file = os.path.join(par_overwrite_path(setup), tier, "validity.jsonl")
+    par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
     if timestamp is not None:
-        pars_files_overwrite = pars_catalog.get_calib_files(
-            par_overwrite_file, timestamp
+        pars_files_overwrite = Catalog.get_files(
+            par_overwrite_file,
+            timestamp,
         )
     else:
-        pars_files_overwrite = pars_catalog.get_calib_files(
-            par_overwrite_file, wildcards.timestamp
+        pars_files_overwrite = Catalog.get_files(
+            par_overwrite_file,
+            wildcards.timestamp,
         )
     if name is None:
-        fullname = f"{tier}-overwrite.json"
+        fullname = f"{tier}-overwrite.yaml"
     else:
-        fullname = f"{tier}_{name}-overwrite.json"
+        fullname = f"{tier}_{name}-overwrite.yaml"
     out_files = []
     for pars_file in pars_files_overwrite:
-        if fullname in pars_file:
-            out_files.append(os.path.join(par_overwrite_path(setup), tier, pars_file))
+        if fullname in str(pars_file):
+            out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file)
     if len(out_files) == 0:
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
diff --git a/rules/dsp.smk b/rules/dsp.smk
index f8ea4a3..3fa105c 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -6,9 +6,10 @@ Snakemake rules for processing dsp tier. This is done in 4 steps:
 - running dsp over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import par_dsp_path
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from scripts.util.create_pars_keylist import ParsKeyResolve
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -18,16 +19,20 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp,
     get_pattern_log,
     get_pattern_pars,
-    get_pattern_pars_overwrite,
-    get_pattern_pars_svm,
 )
 
-dsp_par_catalog = pars_key_resolve.get_par_catalog(
+dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_dsp"], "lar": ["par_dsp"]},
 )
 
+dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml"
+if dsp_par_cat_file.is_file():
+    dsp_par_cat_file.unlink()
+Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
+
 
 rule build_pars_dsp_tau:
     input:
@@ -218,14 +223,16 @@ rule build_pars_dsp_eopt:
 
 rule build_svm_dsp:
     input:
-        hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(
+        hyperpars=lambda wildcards: get_input_par_file(
+            wildcards, "dsp", "svm_hyperpars"
+        ),
+        train_data=lambda wildcards: get_input_par_file(
             wildcards, "dsp", "svm_hyperpars"
         ).replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"),
+        str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -288,7 +295,7 @@ rule build_pars_dsp_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -344,7 +351,7 @@ rule build_pars_dsp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="dplms",
             extension="lh5",
@@ -385,7 +392,7 @@ rule build_dsp:
     input:
         raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
         pars_file=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
             )
         ),
diff --git a/rules/evt.smk b/rules/evt.smk
index c760b54..91f04dd 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -2,13 +2,8 @@
 Snakemake rules for processing evt tier.
 """
 
-from scripts.util.pars_loading import pars_catalog
+from scripts.util.pars_loading import ParsCatalog
 from scripts.util.patterns import (
-    get_pattern_tier_hit,
-    get_pattern_tier_dsp,
-    get_pattern_tier_tcm,
-    get_pattern_tier_pht,
-    get_pattern_tier_psp,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
@@ -31,10 +26,10 @@ for tier in ("evt", "pet"):
                 else get_pattern_tier(setup, "pht", check_in_cycle=False)
             ),
             tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
-            xtalk_matrix=lambda wildcards: get_svm_file(
+            xtalk_matrix=lambda wildcards: get_input_par_file(
                 tier=tier, wildcards=wildcards, name="xtc"
             ),
-            par_files=lambda wildcards: pars_catalog.get_par_file(
+            par_files=lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "pht"
             ),
         output:
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 557d492..cb27661 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -1,6 +1,6 @@
 import glob
-import json
-import os
+import json, yaml
+from pathlib import Path
 
 from scripts.util.FileKey import FileKey, run_grouper
 from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
@@ -9,9 +9,20 @@ from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
 def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
     ignore_keys = []
     if ignore_keys_file is not None:
-        if os.path.isfile(ignore_keys_file):
-            with open(ignore_keys_file) as f:
-                ignore_keys = f.read().splitlines()
+        if Path(ignore_keys_file).is_file():
+            if Path(ignore_keys_file).suffix == ".json":
+                with Path(ignore_keys_file).open() as f:
+                    ignore_keys = json.load(f)
+            elif Path(ignore_keys_file).suffix == ".keylist":
+                with Path(ignore_keys_file).open() as f:
+                    ignore_keys = f.read().splitlines()
+            elif Path(ignore_keys_file).suffix in (".yaml", ".yml"):
+                with Path(ignore_keys_file).open() as f:
+                    ignore_keys = yaml.safe_load(f)
+            else:
+                raise Warning(
+                    "ignore_keys_file file not in json, yaml or keylist format"
+                )
             ignore_keys = [
                 key.split("#")[0].strip() if "#" in key else key.strip()
                 for key in ignore_keys
@@ -23,9 +34,16 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
         ignore_keys = []
 
     if analysis_runs_file is not None:
-        if os.path.isfile(analysis_runs_file):
-            with open(analysis_runs_file) as f:
-                analysis_runs = json.load(f)
+        if Path(analysis_runs_file).is_file():
+            if Path(ignore_keys_file).suffix == ".json":
+                with Path(analysis_runs_file).open() as f:
+                    analysis_runs = json.load(f)
+            elif Path(ignore_keys_file).suffix in (".yaml", ".yml"):
+                with Path(analysis_runs_file).open() as f:
+                    analysis_runs = yaml.safe_load(f)
+            else:
+                raise Warning("analysis_runs file not in json or yaml format")
+                analysis_runs = []
         else:
             analysis_runs = []
             print("no analysis_runs file found")
diff --git a/rules/hit.smk b/rules/hit.smk
index f1bb0ba..af1fcaf 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -6,7 +6,9 @@ Snakemake rules for processing hit tier. This is done in 4 steps:
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -19,12 +21,18 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
-hit_par_catalog = ds.pars_key_resolve.get_par_catalog(
+hit_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_hit"], "lar": ["par_hit"]},
 )
 
+hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml"
+if hit_par_cat_file.is_file():
+    hit_par_cat_file.unlink()
+Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file)
+
 
 # This rule builds the qc using the calibration dsp files and fft files
 rule build_qc:
@@ -72,7 +80,7 @@ rule build_energy_calibration:
         ),
         pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
         ctc_dict=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "dsp"
             )
         ),
@@ -216,7 +224,7 @@ rule build_pars_hit_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -247,7 +255,7 @@ rule build_plts_hit:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -270,7 +278,7 @@ rule build_pars_hit:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
         plts=get_pattern_plts(setup, "hit"),
@@ -297,7 +305,7 @@ rule build_pars_hit:
 rule build_hit:
     input:
         dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
-        pars_file=lambda wildcards: pars_catalog.get_par_file(
+        pars_file=lambda wildcards: ParsCatalog.get_par_file(
             setup, wildcards.timestamp, "hit"
         ),
     output:
diff --git a/rules/pht.smk b/rules/pht.smk
index 76542a3..dad1a24 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from scripts.util.utils import filelist_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -20,12 +21,18 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
-pht_par_catalog = ds.pars_key_resolve.get_par_catalog(
+pht_par_catalog = ds.ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_pht"], "lar": ["par_pht"]},
 )
 
+pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml"
+if pht_par_cat_file.is_file():
+    pht_par_cat_file.unlink()
+Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file)
+
 intier = "psp"
 
 
@@ -50,7 +57,7 @@ for key, dataset in part.datasets.items():
                 cal_files=part.get_filelists(partition, key, intier),
                 fft_files=part.get_filelists(partition, key, intier, datatype="fft"),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -207,7 +214,7 @@ rule build_per_energy_calibration:
         pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"),
         inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"),
         ctc_dict=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, intier
             )
         ),
@@ -258,7 +265,7 @@ for key, dataset in part.datasets.items():
             input:
                 files=part.get_filelists(partition, key, intier),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -440,7 +447,7 @@ for key, dataset in part.datasets.items():
             input:
                 files=part.get_filelists(partition, key, intier),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -620,7 +627,7 @@ for key, dataset in part.datasets.items():
             input:
                 files=part.get_filelists(partition, key, intier),
                 pulser_files=[
-                    file.replace("par_pht", "par_tcm")
+                    str(file).replace("par_pht", "par_tcm")
                     for file in part.get_par_files(
                         pht_par_catalog,
                         partition,
@@ -793,7 +800,7 @@ rule build_pars_pht_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -822,7 +829,7 @@ rule build_plts_pht:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -843,7 +850,7 @@ rule build_pars_pht:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
         plts=get_pattern_plts(setup, "pht"),
@@ -868,7 +875,7 @@ rule build_pars_pht:
 rule build_pht:
     input:
         dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False),
-        pars_file=lambda wildcards: pars_catalog.get_par_file(
+        pars_file=lambda wildcards: ParsCatalog.get_par_file(
             setup, wildcards.timestamp, "pht"
         ),
     output:
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index 5672011..f83e534 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -1,6 +1,6 @@
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.util.utils import filelist_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
diff --git a/rules/psp.smk b/rules/psp.smk
index a959cf4..53e8f59 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -6,9 +6,10 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from scripts.util.utils import set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -20,12 +21,18 @@ from scripts.util.patterns import (
     get_pattern_pars,
 )
 
-psp_par_catalog = pars_key_resolve.get_par_catalog(
+psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(setup, "raw", check_in_cycle=False),
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
+psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml"
+if psp_par_cat_file.is_file():
+    psp_par_cat_file.unlink()
+Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
+ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
+
 psp_rules = {}
 for key, dataset in part.datasets.items():
     for partition in dataset.keys():
@@ -172,14 +179,18 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
 rule build_svm_psp:
     input:
-        hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(
+        hyperpars=lambda wildcards: get_input_par_file(
+            wildcards, "psp", "svm_hyperpars"
+        ),
+        train_data=lambda wildcards: get_input_par_file(
             wildcards, "psp", "svm_hyperpars"
-        ).replace("hyperpars.json", "train.lh5"),
+        )
+        .as_posix()
+        .replace("hyperpars.json", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"),
+        get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -221,7 +232,7 @@ rule build_pars_psp_objects:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="objects",
             extension="pkl",
@@ -250,7 +261,7 @@ rule build_plts_psp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -271,7 +282,7 @@ rule build_pars_psp_db:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
     output:
@@ -298,7 +309,7 @@ rule build_pars_psp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="dplms",
             extension="lh5",
@@ -337,9 +348,9 @@ rule build_pars_psp:
 
 rule build_psp:
     input:
-        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
         pars_file=ancient(
-            lambda wildcards: pars_catalog.get_par_file(
+            lambda wildcards: ParsCatalog.get_par_file(
                 setup, wildcards.timestamp, "psp"
             )
         ),
diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk
index 5b9cd6f..b89d8d3 100644
--- a/rules/qc_phy.smk
+++ b/rules/qc_phy.smk
@@ -1,11 +1,10 @@
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import filelist_path, par_pht_path, set_last_rule_name
+from scripts.util.pars_loading import ParsCatalog
+from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.util.utils import filelist_path, set_last_rule_name
 from scripts.util.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_par_pht,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
@@ -138,7 +137,7 @@ rule build_plts_pht_phy:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="qcphy",
         ),
@@ -160,7 +159,7 @@ rule build_pars_pht_phy:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
-            configs,
+            det_status,
             chan_maps,
             name="qcphy",
         ),
diff --git a/rules/raw.smk b/rules/raw.smk
index a81520a..8239519 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,6 +1,5 @@
 from scripts.util.patterns import (
     get_pattern_tier_daq,
-    get_pattern_tier_raw,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index 6ed4510..f01c879 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -4,7 +4,7 @@
 from legendmeta import LegendMetadata, TextDB
 
 argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--det_status", help="det_status", type=str, required=True)
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True)
@@ -12,8 +12,8 @@
 argparser.add_argument("--output_file", help="output_file", type=str, required=True)
 args = argparser.parse_args()
 
-configs = TextDB(args.configs, lazy=True)
-status_map = configs.on(args.timestamp, system=args.datatype)["analysis"]
+det_status = TextDB(args.det_status, lazy=True)
+status_map = det_status.statuses.on(args.timestamp, system=args.datatype)
 
 channel_map = LegendMetadata(args.channelmap, lazy=True)
 chmap = channel_map.channelmaps.on(args.timestamp)
@@ -23,7 +23,6 @@
     for chan in status_map
     if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
 ]
-
 Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
 with Path(args.output_file).open("w") as f:
     for chan in channels:
diff --git a/scripts/util/FileKey.py b/scripts/util/FileKey.py
index 9f646cc..ca4573c 100644
--- a/scripts/util/FileKey.py
+++ b/scripts/util/FileKey.py
@@ -57,6 +57,8 @@ def get_filekey_from_pattern(cls, filename, pattern=None):
             except AttributeError:
                 key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern))
         else:
+            if isinstance(pattern, Path):
+                pattern = pattern.as_posix()
             try:
                 key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern))
             except AttributeError:
@@ -92,6 +94,8 @@ def parse_keypart(cls, keypart):
         return cls(**d)
 
     def get_path_from_filekey(self, pattern, **kwargs):
+        if isinstance(pattern, Path):
+            pattern = pattern.as_posix()
         if kwargs is None:
             return smk.io.expand(pattern, **self._asdict())
         else:
@@ -163,6 +167,8 @@ def name(self):
         return f"{super().name}-{self.processing_step}"
 
     def get_path_from_filekey(self, pattern, **kwargs):
+        if isinstance(pattern, Path):
+            pattern = pattern.as_posix()
         if not isinstance(pattern, str):
             pattern = pattern(self.tier, self.identifier)
         if kwargs is None:
@@ -198,6 +204,8 @@ def _asdict(self):
 
     @staticmethod
     def get_channel_files(keypart, par_pattern, chan_list):
+        if isinstance(par_pattern, Path):
+            par_pattern = par_pattern.as_posix()
         d = ChannelProcKey.parse_keypart(keypart)
         filenames = []
         for chan in chan_list:
diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py
index 90b7204..caa4dd2 100644
--- a/scripts/util/__init__.py
+++ b/scripts/util/__init__.py
@@ -1,8 +1,8 @@
-from .CalibCatalog import CalibCatalog, Props, PropsStream
-from .create_pars_keylist import pars_key_resolve
-from .dataset_cal import dataset_file
+from .cal_grouping import CalGrouping
+from .catalog import Catalog, Props, PropsStream
+from .create_pars_keylist import ParsKeyResolve
 from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey
-from .pars_loading import pars_catalog
+from .pars_loading import ParsCatalog
 from .utils import (
     runcmd,
     subst_vars,
@@ -14,13 +14,13 @@
 __all__ = [
     "Props",
     "PropsStream",
-    "CalibCatalog",
-    "pars_key_resolve",
-    "dataset_file",
+    "Catalog",
+    "ParsKeyResolve",
+    "CalGrouping",
     "FileKey",
     "ProcessingFileKey",
     "ChannelProcKey",
-    "pars_catalog",
+    "ParsCatalog",
     "unix_time",
     "runcmd",
     "subst_vars_impl",
diff --git a/scripts/util/cal_grouping.py b/scripts/util/cal_grouping.py
index 651c137..e41d5c7 100644
--- a/scripts/util/cal_grouping.py
+++ b/scripts/util/cal_grouping.py
@@ -5,19 +5,26 @@
 import json
 from pathlib import Path
 
+import yaml
+
 from .FileKey import ChannelProcKey, ProcessingFileKey
 from .patterns import (
     get_pattern_log_channel,
+    get_pattern_pars,
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
 )
 from .utils import filelist_path
 
 
-class cal_grouping:
+class CalGrouping:
     def __init__(self, setup, input_file):
-        with Path(input_file).open() as r:
-            self.datasets = json.load(r)
+        if Path(input_file).suffix == ".json":
+            with Path(input_file).open() as r:
+                self.datasets = json.load(r)
+        elif Path(input_file).suffix in (".yaml", ".yml"):
+            with Path(input_file).open() as r:
+                self.datasets = yaml.safe_load(r)
         self.expand_runs()
         self.setup = setup
 
@@ -28,7 +35,7 @@ def expand_runs(self):
                     if isinstance(runs, str) and ".." in runs:
                         start, end = runs.split("..")
                         self.datasets[channel][part][per] = [
-                            f"r{x:02}" for x in range(int(start[2:]), int(end) + 1)
+                            f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)
                         ]
 
     def get_dataset(self, dataset, channel):
@@ -49,7 +56,8 @@ def get_filelists(self, dataset, channel, tier, experiment="l200", datatype="cal
             else:
                 files += [
                     Path(filelist_path(self.setup))
-                    / "all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist"
+                    / f"all-{experiment}-{per}-{run}-{datatype}-{tier}.filelist"
+                    for run in dataset[per]
                 ]
         return files
 
@@ -62,14 +70,19 @@ def get_par_files(
         experiment="l200",
         datatype="cal",
         name=None,
-        extension="json",
+        extension="yaml",
     ):
         dataset = self.get_dataset(dataset, channel)
         all_par_files = []
         for item in catalog:
             par_files = item.apply
             for par_file in par_files:
-                if par_file.split("-")[-1] == f"par_{tier}.json":
+                if (
+                    par_file.split("-")[-1]
+                    == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split(
+                        "-"
+                    )[-1]
+                ):
                     all_par_files.append(par_file)
         if channel == "default":
             channel = "{channel}"
@@ -117,7 +130,12 @@ def get_plt_files(
         for item in catalog:
             par_files = item.apply
             for par_file in par_files:
-                if par_file.split("-")[-1] == f"par_{tier}.json":
+                if (
+                    par_file.split("-")[-1]
+                    == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split(
+                        "-"
+                    )[-1]
+                ):
                     all_par_files.append(par_file)
         if channel == "default":
             channel = "{channel}"
@@ -201,6 +219,6 @@ def get_wildcard_constraints(self, dataset, channel):
             out_string = ""
             for channel in exclude_chans:
                 out_string += f"(?!{channel})"
-            return out_string + r"ch\d{7}"
+            return out_string + r"^[VPCB]\d{1}\w{5}$"
         else:
-            return r"ch\d{7}"
+            return r"^[VPCB]\d{1}\w{5}$"
diff --git a/scripts/util/catalog.py b/scripts/util/catalog.py
index 390a7c1..9ec9b80 100644
--- a/scripts/util/catalog.py
+++ b/scripts/util/catalog.py
@@ -79,7 +79,7 @@ class PropsStream:
 
     @staticmethod
     def get(value):
-        if isinstance(value, str):
+        if isinstance(value, (str, Path)):
             return PropsStream.read_from(value)
 
         if isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index f347975..c3e1f22 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -14,7 +14,7 @@
 from .patterns import par_validity_pattern
 
 
-class pars_key_resolve:
+class ParsKeyResolve:
 
     def __init__(self, valid_from, category, apply):
         self.valid_from = valid_from
@@ -70,7 +70,7 @@ def generate_par_keylist(keys):
         keys = sorted(keys, key=FileKey.get_unix_timestamp)
         keylist.append(keys[0])
         for key in keys[1:]:
-            matched_key = pars_key_resolve.match_keys(keylist[-1], key)
+            matched_key = ParsKeyResolve.match_keys(keylist[-1], key)
             if matched_key not in keylist:
                 keylist.append(matched_key)
             else:
@@ -89,10 +89,10 @@ def match_entries(entry1, entry2):
     @staticmethod
     def match_all_entries(entrylist, name_dict):
         out_list = []
-        out_list.append(pars_key_resolve.from_filekey(entrylist[0], name_dict))
+        out_list.append(ParsKeyResolve.from_filekey(entrylist[0], name_dict))
         for entry in entrylist[1:]:
-            new_entry = pars_key_resolve.from_filekey(entry, name_dict)
-            pars_key_resolve.match_entries(out_list[-1], new_entry)
+            new_entry = ParsKeyResolve.from_filekey(entry, name_dict)
+            ParsKeyResolve.match_entries(out_list[-1], new_entry)
             out_list.append(new_entry)
         return out_list
 
@@ -100,14 +100,17 @@ def match_all_entries(entrylist, name_dict):
     def get_keys(keypart, search_pattern):
         d = FileKey.parse_keypart(keypart)
         try:
-            tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(search_pattern))
+            tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
+
         except AttributeError:
-            tier_pattern_rx = re.compile(smk.io.regex(search_pattern))
+            tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
         fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
-        files = Path(fn_glob_pattern).glob()
+        p = Path(fn_glob_pattern)
+        parts = p.parts[p.is_absolute() :]
+        files = Path(p.root).glob(str(Path(*parts)))
         keys = []
         for f in files:
-            m = tier_pattern_rx.match(f)
+            m = tier_pattern_rx.match(str(f))
             if m is not None:
                 d = m.groupdict()
                 key = FileKey(**d)
@@ -118,19 +121,19 @@ def get_keys(keypart, search_pattern):
     def get_par_catalog(keypart, search_patterns, name_dict):
         if isinstance(keypart, str):
             keypart = [keypart]
-        if isinstance(search_patterns, str):
+        if isinstance(search_patterns, (str, Path)):
             search_patterns = [search_patterns]
         keylist = []
         for search_pattern in search_patterns:
             for keypar in keypart:
-                keylist += pars_key_resolve.get_keys(keypar, search_pattern)
+                keylist += ParsKeyResolve.get_keys(keypar, search_pattern)
         if len(keylist) != 0:
             keys = sorted(keylist, key=FileKey.get_unix_timestamp)
-            keylist = pars_key_resolve.generate_par_keylist(keys)
+            keylist = ParsKeyResolve.generate_par_keylist(keys)
 
-            entrylist = pars_key_resolve.match_all_entries(keylist, name_dict)
+            entrylist = ParsKeyResolve.match_all_entries(keylist, name_dict)
         else:
             msg = "No Keys found"
             warnings.warn(msg, stacklevel=0)
-            entrylist = [pars_key_resolve("00000000T000000Z", "all", [])]
+            entrylist = [ParsKeyResolve("00000000T000000Z", "all", [])]
         return entrylist
diff --git a/scripts/util/pars_loading.py b/scripts/util/pars_loading.py
index a21f6ae..137ae03 100644
--- a/scripts/util/pars_loading.py
+++ b/scripts/util/pars_loading.py
@@ -12,7 +12,7 @@
 from .utils import get_pars_path, par_overwrite_path
 
 
-class pars_catalog(Catalog):
+class ParsCatalog(Catalog):
     @staticmethod
     def match_pars_files(filelist1, filelist2):
         for file2 in filelist2:
@@ -30,11 +30,11 @@ def match_pars_files(filelist1, filelist2):
     @staticmethod
     def get_par_file(setup, timestamp, tier):
         par_file = Path(get_pars_path(setup, tier)) / "validity.yaml"
-        pars_files = pars_catalog.get_calib_files(par_file, timestamp)
+        pars_files = ParsCatalog.get_files(par_file, timestamp)
         par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
-        pars_files_overwrite = pars_catalog.get_calib_files(par_overwrite_file, timestamp)
+        pars_files_overwrite = ParsCatalog.get_files(par_overwrite_file, timestamp)
         if len(pars_files_overwrite) > 0:
-            pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
+            pars_files, pars_files_overwrite = ParsCatalog.match_pars_files(
                 pars_files, pars_files_overwrite
             )
         pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files]
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index cae1cd0..2418ead 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -14,7 +14,6 @@
     tier_daq_path,
     tier_path,
     tier_raw_blind_path,
-    tier_skm_path,
     tmp_log_path,
     tmp_par_path,
     tmp_plts_path,
@@ -91,28 +90,26 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_"
-            + f"{tier}.lh5"
+            / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5")
         )
     elif tier in ["evt_concat", "pet_concat"]:
         file_pattern = (
             Path(get_tier_path(setup, tier[:3]))
             / "{datatype}"
-            / "{experiment}-{period}-{run}-{datatype}-tier_"
-            + f"{tier[:3]}.lh5"
+            / ("{experiment}-{period}-{run}-{datatype}-tier_" + f"{tier[:3]}.lh5")
         )
 
     elif tier == "skm":
         file_pattern = (
-            Path(f"{tier_skm_path(setup)}")
+            Path(f"{get_tier_path(setup, tier)}")
             / "phy"
             / "{experiment}-{period}-{run}-{datatype}-tier_skm.lh5"
         )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if tier_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
-        return "/tmp/" + Path(file_pattern).name
+    if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True:
+        return "/tmp/" + file_pattern.name
     else:
         return file_pattern
 
@@ -125,8 +122,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
                 / "cal"
                 / "{period}"
                 / "{run}"
-                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
-                + f"{tier}_{name}.{extension}"
+                / (
+                    "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                    + f"{tier}_{name}.{extension}"
+                )
             )
         else:
             file_pattern = (
@@ -134,19 +133,21 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
                 / "cal"
                 / "{period}"
                 / "{run}"
-                / "{experiment}-{period}-{run}-cal-{timestamp}-par_"
-                + f"{tier}.{extension}"
+                / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}")
             )
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if pars_path(setup) not in Path(file_pattern).resolve(strict=False) and check_in_cycle is True:
+    if (
+        pars_path(setup) not in str(Path(file_pattern).resolve(strict=False))
+        and check_in_cycle is True
+    ):
         if name is None:
             return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}"
         else:
             return (
                 "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-"
-                + f"par_{tier}_{name}.{extension}"
+                f"par_{tier}_{name}.{extension}"
             )
     else:
         return file_pattern
@@ -160,8 +161,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-"
-            + f"par_{tier}_{name}.{ext}"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}")
         )
     else:
         return (
@@ -170,8 +170,7 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-"
-            + f"par_{tier}.{ext}"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{ext}")
         )
 
 
@@ -183,8 +182,10 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            f"{tier}_{name}-overwrite.{extension}"
+            / (
+                "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+                f"{tier}_{name}-overwrite.{extension}"
+            )
         )
     else:
         return (
@@ -193,9 +194,11 @@ def get_pattern_pars_overwrite(setup, tier, name=None, extension="yaml"):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
-            + tier
-            + f"-overwrite.{extension}"
+            / (
+                "{experiment}-{period}-{run}-{datatype}-{timestamp}-par_"
+                + tier
+                + f"-overwrite.{extension}"
+            )
         )
 
 
@@ -203,15 +206,12 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
     if datatype is None:
         datatype = "{datatype}"
     if name is None:
-        return (
-            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
-            + datatype
-            + "-{timestamp}-par_"
-            + f"{tier}.{extension}"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}"
         )
     else:
-        return (
-            Path(f"{tmp_par_path(setup)}") / "{experiment}-{period}-{run}-"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-"
             + datatype
             + "-{timestamp}"
             + f"par_{tier}_{name}.{extension}"
@@ -220,32 +220,24 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
 
 def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
-        return (
-            Path(f"{tmp_par_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
-            + f"{tier}.{extension}"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}"
         )
     else:
-        return (
-            Path(f"{tmp_par_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+        return Path(f"{tmp_par_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
             + f"{tier}_{name}.{extension}"
         )
 
 
 def get_pattern_plts_tmp_channel(setup, tier, name=None):
     if name is None:
-        return (
-            Path(f"{tmp_plts_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
-            + tier
-            + ".pkl"
+        return Path(f"{tmp_plts_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + tier + ".pkl"
         )
     else:
-        return (
-            Path(f"{tmp_plts_path(setup)}")
-            / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
-            + f"{tier}_{name}.pkl"
+        return Path(f"{tmp_plts_path(setup)}") / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl"
         )
 
 
@@ -257,9 +249,7 @@ def get_pattern_plts(setup, tier, name=None):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
-            + tier
-            + ".dir"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + ".dir")
         )
     else:
         return (
@@ -268,11 +258,7 @@ def get_pattern_plts(setup, tier, name=None):
             / "cal"
             / "{period}"
             / "{run}"
-            / "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
-            + tier
-            + "_"
-            + name
-            + ".dir"
+            / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir")
         )
 
 
@@ -280,9 +266,7 @@ def get_pattern_log(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / "{experiment}-{period}-{run}-{datatype}-{timestamp}-"
-        + processing_step
-        + ".log"
+        / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log")
     )
 
 
@@ -290,9 +274,7 @@ def get_pattern_log_channel(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-"
-        + processing_step
-        + ".log"
+        / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log")
     )
 
 
@@ -300,7 +282,5 @@ def get_pattern_log_concat(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / "{experiment}-{period}-{run}-{datatype}-"
-        + processing_step
-        + ".log"
+        / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log")
     )
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index fd433c7..319eaa6 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -114,6 +114,10 @@ def chan_map_path(setup):
     return setup["paths"]["chan_map"]
 
 
+def det_status_path(setup):
+    return setup["paths"]["detector_status"]
+
+
 def metadata_path(setup):
     return setup["paths"]["metadata"]
 

From bbf65e90c9b4ead350b3761de17a473e9b2034fc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 29 Nov 2024 15:14:35 +0100
Subject: [PATCH 011/101] move info from readme to docs

---
 README.md                   | 112 ------------------------------------
 docs/Makefile               |  21 +++++++
 docs/source/developer.rst   |  15 +++++
 docs/source/index.rst       |  41 +++++++++++++
 docs/source/user_manual.rst |  98 +++++++++++++++++++++++++++++++
 5 files changed, 175 insertions(+), 112 deletions(-)
 create mode 100644 docs/Makefile
 create mode 100644 docs/source/developer.rst
 create mode 100644 docs/source/index.rst
 create mode 100644 docs/source/user_manual.rst

diff --git a/README.md b/README.md
index 2459337..3565167 100644
--- a/README.md
+++ b/README.md
@@ -3,115 +3,3 @@
 Implementation of an automatic data processing flow for L200
 data, based on
 [Snakemake](https://snakemake.readthedocs.io/).
-
-
-## Configuration
-
-Data processing resources are configured via a single site-dependent (and
-possibly user-dependent) configuration file, named `config.json` in the
-following. You may choose an arbitrary name, though.
-
-Use the included [templates/config.json](templates/config.json) as a template
-and adjust the data base paths as necessary. Note that, when running Snakemake,
-the default path to the config file is `./config.json`.
-
-
-## Key-Lists
-
-Data generation is based on key-lists, which are flat text files
-(extension ".keylist") containing one entry of the form
-`{experiment}-{period}-{run}-{datatype}-{timestamp}` per line.
-
-Key-lists can be auto-generated based on the available  DAQ files
-using Snakemake targets of the form
-
-* `all-{experiment}.keylist`
-* `all-{experiment}-{period}.keylist`
-* `all-{experiment}-{period}-{run}.keylist`
-* `all-{experiment}-{period}-{run}-{datatype}.keylist`
-
-which will generate the list of available file keys for all l200 files, resp.
-a specific period, or a specific period and run, etc.
-
-For example:
-```shell
-$ snakemake all-l200-myper.keylist
-```
-will generate a key-list with all files regarding period `myper`.
-
-
-## File-Lists
-
-File-lists are flat files listing output files that should be generated,
-with one file per line. A file-list will typically be generated for a given
-data tier from a key-list, using the Snakemake targets of the form
-`{label}-{tier}.filelist` (generated from `{label}.keylist`).
-
-For file lists based on auto-generated key-lists like
-`all-{experiment}-{period}-{tier}.filelist`, the corresponding key-list
-(`all-{experiment}-{period}.keylist` in this case) will be created
-automatically, if it doesn't exist.
-
-Example:
-```shell
-$ snakemake all-mydet-mymeas-tier2.filelist
-```
-
-File-lists may of course also be derived from custom keylists, generated
-manually or by other means, e.g. `my-dataset-raw.filelist` will be
-generated from `my-dataset.keylist`.
-
-
-## Main output generation
-
-Usually, the main output will be determined by a file-list, resp. a key-list
-and data tier. The special output target `{label}-{tier}.gen` is used to
-generate all files listed in `{label}-{tier}.filelist`. After the files
-are created, the empty file `{label}-{tier}.filelist` will be created to
-mark the successful data production.
-
-Snakemake targets like `all-{experiment}-{period}-{tier}.gen` may be used
-to automatically generate key-lists and file-lists (if not already present)
-and produce all possible output for the given data tier, based on available
-tier0 files which match the target.
-
-Example:
-```shell
-$ snakemake all-mydet-mymeas-tier2.gen
-```
-Targets like `my-dataset-raw.gen` (derived from a key-list
-`my-dataset.keylist`) are of course allowed as well.
-
-
-## Monitoring
-
-Snakemake supports monitoring by connecting to a
-[panoptes](https://github.com/panoptes-organization/panoptes) server.
-
-Run (e.g.)
-```shell
-$ panoptes --port 5000
-```
-in the background to run a panoptes server instance, which comes with a
-GUI that can be accessed with a web-brower on the specified port.
-
-Then use the Snakemake option `--wms-monitor` to instruct Snakemake to push
-progress information to the panoptes server:
-```shell
-snakemake --wms-monitor http://127.0.0.1:5000 [...]
-```
-
-## Using software containers
-
-This dataflow doesn't use Snakemake's internal Singularity support, but
-instead supports Singularity containers via
-[`venv`](https://github.com/oschulz/singularity-venv) environments
-for greater control.
-
-To use this, the path to `venv` and the name of the environment must be set
-in `config.json`.
-
-This is only relevant then running Snakemake *outside* of the software
-container, e.g. then using a batch system (see below). If Snakemake
-and the whole workflow is run inside of a container instance, no
-container-related settings in `config.json` are required.
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..9be493d
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,21 @@
+SHELL := /bin/bash
+SOURCEDIR = source
+BUILDDIR = build
+
+all: apidoc
+	sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going
+
+apidoc: clean-apidoc
+	sphinx-apidoc \
+      --private \
+      --module-first \
+      --force \
+      --output-dir "$(SOURCEDIR)/api" \
+      ../scripts \
+      ../rules
+
+clean-apidoc:
+	rm -rf "$(SOURCEDIR)/api"
+
+clean: clean-apidoc
+	rm -rf "$(BUILDDIR)"
diff --git a/docs/source/developer.rst b/docs/source/developer.rst
new file mode 100644
index 0000000..b6d7560
--- /dev/null
+++ b/docs/source/developer.rst
@@ -0,0 +1,15 @@
+Developers Guide
+===============
+
+Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files.
+These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory.
+In general the structure is that a series of rules are defined to run on some calibration data generation
+a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier.
+For most rules there are 2 versions the basic version and the partition version where the first uses a single run
+while the latter will group many runs together.
+This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+
+Each rule has specified its inputs and outputs along with how to generate which can be
+a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory.
+Additional parameters can also be defined.
+Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_.
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..8534e71
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,41 @@
+Welcome to legend-dataflow's documentation!
+==================================
+
+*legend-dataflow* is a Python package based on Snakemake `<https://snakemake.readthedocs.io/en/stable/index.html>`_
+for running the data production of LEGEND.
+It is designed to calibrate and optimise hundreds of channels in parallel before
+bringing them all together to process the data. It takes as an input the metadata
+at `legend metadata <https://github.com/legend-exp/legend-metadata>`_.
+
+Getting started
+---------------
+
+It is recommended to install and use the package through the `legend-prodenv <https://github.com/legend-exp/legend-prodenv>`_.
+
+Next steps
+----------
+
+.. toctree::
+   :maxdepth: 1
+
+   Package API reference <api/modules>
+
+.. toctree::
+   :maxdepth: 1
+
+   tutorials
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Related projects
+
+   LEGEND Data Objects <https://legend-pydataobj.readthedocs.io>
+   Decoding Digitizer Data <https://legend-daq2lh5.readthedocs.io>
+   Digital Signal Processing <https://dspeed.readthedocs.io>
+   Pygama <https://pygama.readthedocs.io>
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Development
+
+   Source Code <https://github.com/legend-exp/legend-dataflow>
diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst
new file mode 100644
index 0000000..fb3e81b
--- /dev/null
+++ b/docs/source/user_manual.rst
@@ -0,0 +1,98 @@
+Configuration
+=============
+
+Data processing resources are configured via a single site-dependent (and
+possibly user-dependent) configuration file, generally named ``config.json``.
+Although you can choose any arbitrary name.
+
+A template for this file is located at ``templates/config.json``
+which can be copied to the working directory
+the paths adjusted as necessary. Note that, when running Snakemake,
+the default path to the config file is ``./config.json``.
+
+Profiles
+========
+
+A number of profiles are also included in the ``profiles`` directory. If none are specified,
+the default profile is used. The profile can be specified by using the ``--profile`` option
+when running Snakemake. These control how many jobs are run simultaneously, based on how many cores
+are specified and the memory constraints of the system. A full list of all the options
+that can be specified to snakemake can be found at `snakemake <https://snakemake.readthedocs.io/en/stable/executing/cli.html>`_.
+
+
+Running the Dataflow
+====================
+
+To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file
+generation. In a simple case this may just be a single file e.g.
+```shell
+$ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5
+```
+This would generate the file and all the files that are required to generate it.
+In general though we want to generate a large number of files, and we can do this using the ``gen`` target.
+
+Main output generation
+======================
+
+Usually, the main output will be determined by a file-list.
+The special output target ``{label}-{tier}.gen`` is used to
+generate all files that follow the label up to the specified tier.
+The label is composed of the following parts:
+- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file
+in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+- experiment: the experiment name i.e. l200
+- period: the period of the data e.g. p03
+- run: the run number e.g. r000
+- datatype: the data type e.g. cal
+- timestamp: the timestamp of the data e.g. 20230401T000000Z
+
+Example:
+```shell
+$ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen
+```
+
+You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen``
+If you want to specify a lower part of the label but leave a higher part free,
+you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` .
+Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between
+e.g. ``all-l200-p03-r000_r001-dsp.gen``.
+
+After the files
+are created, the empty file ``{label}-{tier}.gen```` will be created to
+mark the successful data production.
+
+
+Monitoring
+==========
+
+Snakemake supports monitoring by connecting to a
+`panoptes <https://github.com/panoptes-organization/panoptes>`_ server.
+
+Run (e.g.)
+```shell
+$ panoptes --port 5000
+```
+in the background to run a panoptes server instance, which comes with a
+GUI that can be accessed with a web-brower on the specified port.
+
+Then use the Snakemake option ``--wms-monitor`` to instruct Snakemake to push
+progress information to the panoptes server:
+```shell
+snakemake --wms-monitor http://127.0.0.1:5000 [...]
+```
+
+Using software containers
+=========================
+
+This dataflow doesn't use Snakemake's internal Singularity support, but
+instead supports Singularity containers via
+`venv <https://github.com/oschulz/singularity-venv>`_ environments
+for greater control.
+
+To use this, the path to ``venv`` and the name of the environment must be set
+in ``config.json``.
+
+This is only relevant then running Snakemake *outside* of the software
+container, e.g. when using a batch system (see below). If Snakemake
+and the whole workflow is run inside of a container instance, no
+container-related settings in ``config.json`` are required.

From 9639200d37d4039bd74460d19665acedccdfc2c4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 3 Dec 2024 22:46:01 +0100
Subject: [PATCH 012/101] add ability to specify different file selections and
 cleanup

---
 rules/filelist_gen.smk | 127 ++++++++++++++++++++++++++++-------------
 1 file changed, 86 insertions(+), 41 deletions(-)

diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index cb27661..d0356a8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -5,9 +5,34 @@ from pathlib import Path
 from scripts.util.FileKey import FileKey, run_grouper
 from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
 
-
-def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
+concat_datatypes = ["phy"]
+concat_tiers = ["skm", "pet_concat", "evt_concat"]
+blind_datatypes = ["phy"]
+
+
+def expand_runs(in_dict):
+    """
+    This function expands out the runs if a range is specified in the dictionary
+    e.g.
+    {
+        "p01": "r001..r005"
+    }
+    """
+    for per, run_list in in_dict.items():
+        if isinstance(run_list, str) and ".." in runs:
+            start, end = runs.split("..")
+            in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)]
+    return in_dict
+
+
+def get_analysis_runs(
+    ignore_keys_file=None, analysis_runs_file=None, file_selection="all"
+):
+    """
+    This function reads in the ignore_keys and analysis_runs files and returns the dictionaries
+    """
     ignore_keys = []
+    analysis_runs = {}
     if ignore_keys_file is not None:
         if Path(ignore_keys_file).is_file():
             if Path(ignore_keys_file).suffix == ".json":
@@ -20,20 +45,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
                 with Path(ignore_keys_file).open() as f:
                     ignore_keys = yaml.safe_load(f)
             else:
-                raise Warning(
+                raise ValueError(
                     "ignore_keys_file file not in json, yaml or keylist format"
                 )
-            ignore_keys = [
+            ignore_keys = [  # remove any comments in the keylist
                 key.split("#")[0].strip() if "#" in key else key.strip()
                 for key in ignore_keys
             ]
         else:
-            print("no ignore_keys.keylist file found")
-            ignore_keys = []
-    else:
-        ignore_keys = []
+            msg = f"no ignore_keys file found: {ignore_keys_file}"
+            raise ValueError(msg)
 
-    if analysis_runs_file is not None:
+    if analysis_runs_file is not None and file_selection != "all":
         if Path(analysis_runs_file).is_file():
             if Path(ignore_keys_file).suffix == ".json":
                 with Path(analysis_runs_file).open() as f:
@@ -42,13 +65,18 @@ def get_analysis_runs(ignore_keys_file=None, analysis_runs_file=None):
                 with Path(analysis_runs_file).open() as f:
                     analysis_runs = yaml.safe_load(f)
             else:
-                raise Warning("analysis_runs file not in json or yaml format")
-                analysis_runs = []
+                msg = f"analysis_runs file not in json or yaml format: {analysis_runs_file}"
+                raise ValueError(msg)
+            if file_selection in analysis_runs:
+                analysis_runs = expand_runs(
+                    analysis_runs[file_selection]
+                )  # select the file_selection and expand out the runs
+            else:
+                msg = f"Unknown file selection: {file_selection} not in {list(analysis_runs)}"
+                raise ValueError(msg)
         else:
-            analysis_runs = []
-            print("no analysis_runs file found")
-    else:
-        analysis_runs = []
+            msg = f"no analysis_runs file found: {analysis_runs_file}"
+            raise ValueError(msg)
     return analysis_runs, ignore_keys
 
 
@@ -75,9 +103,14 @@ def get_keys(keypart):
 
 
 def get_pattern(setup, tier):
+    """
+    Helper function to get the search pattern for the given tier,
+    some tiers such as skm need to refer to a different pattern when looking for files
+    as only phy files are taken to skm others are only taken to pet
+    """
     if tier == "blind":
         fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
-    elif tier == "skm" or tier == "pet_concat":
+    elif tier in ("skm", "pet_concat"):
         fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
         fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
@@ -87,6 +120,9 @@ def get_pattern(setup, tier):
 
 
 def concat_phy_filenames(setup, phy_filenames, tier):
+    """
+    This function concatenates the files from the same run together
+    """
     fn_pattern = get_pattern(setup, tier)
     # group files by run
     sorted_phy_filenames = run_grouper(phy_filenames)
@@ -110,18 +146,20 @@ def build_filelist(
     tier,
     ignore_keys=None,
     analysis_runs=None,
-    file_selection="all",
 ):
+    """
+    This function builds the filelist for the given filekeys, search pattern and tier.
+    It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict
+    """
     fn_pattern = get_pattern(setup, tier)
 
     if ignore_keys is None:
         ignore_keys = []
     if analysis_runs is None:
-        analysis_runs = []
+        analysis_runs = {}
 
     phy_filenames = []
     other_filenames = []
-
     for key in filekeys:
         fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0]
         files = glob.glob(fn_glob_pattern)
@@ -131,7 +169,7 @@ def build_filelist(
             if _key.name in ignore_keys:
                 pass
             else:
-                if tier == "blind" and _key.datatype == "phy":
+                if tier == "blind" and _key.datatype in blind_datatypes:
                     filename = FileKey.get_path_from_filekey(
                         _key, get_pattern_tier_raw_blind(setup)
                     )
@@ -142,32 +180,38 @@ def build_filelist(
                 else:
                     filename = FileKey.get_path_from_filekey(_key, fn_pattern)
 
-                if file_selection == "all":
-                    if _key.datatype == "phy":
+                if analysis_runs == {}:
+                    if (
+                        _key.datatype in concat_datatypes
+                    ):  # separate out phy files as some tiers these are concatenated
                         phy_filenames += filename
                     else:
                         other_filenames += filename
-                elif file_selection == "sel":
-                    if analysis_runs == "all" or (
-                        _key.period in analysis_runs
+                else:
+                    if (
+                        _key.period
+                        in analysis_runs  # check if period in analysis_runs dicts
                         and (
-                            _key.run in analysis_runs[_key.period]
-                            or analysis_runs[_key.period] == "all"
+                            _key.run
+                            in analysis_runs[
+                                _key.period
+                            ]  # check if run in analysis_runs dicts
+                            or analysis_runs[_key.period]
+                            == "all"  # or if runs is just specified as "all"
                         )
                     ):
-                        if _key.datatype == "phy":
-                            phy_filenames += filename
+                        if _key.datatype in concat_datatypes:
+                            phy_filenames += filename  # separate out phy files as some tiers these are concatenated
                         else:
                             other_filenames += filename
-                else:
-                    msg = "unknown file selection"
-                    raise ValueError(msg)
 
     phy_filenames = sorted(phy_filenames)
     other_filenames = sorted(other_filenames)
 
-    if tier == "skm" or tier == "pet_concat" or tier == "evt_concat":
-        phy_filenames = concat_phy_filenames(setup, phy_filenames, tier)
+    if tier in concat_tiers:
+        phy_filenames = concat_phy_filenames(
+            setup, phy_filenames, tier
+        )  # concat phy files
 
     return phy_filenames + other_filenames
 
@@ -175,10 +219,11 @@ def build_filelist(
 def get_filelist(
     wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
-    file_selection = wildcards.label[:3]
-    keypart = wildcards.label[3:]
-
-    analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file)
+    file_selection = wildcards.label.split("-", 1)[0]
+    keypart = f'-{wildcards.label.split("-", 1)[1]}'  # remove the file selection from the keypart
+    analysis_runs, ignore_keys = get_analysis_runs(
+        ignore_keys_file, analysis_runs_file, file_selection
+    )
 
     filekeys = get_keys(keypart)
 
@@ -189,7 +234,6 @@ def get_filelist(
         wildcards.tier,
         ignore_keys,
         analysis_runs,
-        file_selection,
     )
 
 
@@ -204,7 +248,9 @@ def get_filelist_full_wildcards(
 ):
     keypart = f"-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-{wildcards.datatype}"
 
-    analysis_runs, ignore_keys = get_analysis_runs(ignore_keys_file, analysis_runs_file)
+    analysis_runs, ignore_keys = get_analysis_runs(
+        ignore_keys_file, analysis_runs_file, file_selection
+    )
 
     filekeys = get_keys(keypart)
     return build_filelist(
@@ -214,5 +260,4 @@ def get_filelist_full_wildcards(
         tier,
         ignore_keys,
         analysis_runs,
-        file_selection,
     )

From 0cb28b69de8f30acf0b21fc272b9515293b2cf97 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 3 Dec 2024 22:49:33 +0100
Subject: [PATCH 013/101] updates for new meta, switch to detector keying in
 configs

---
 Snakefile                           | 23 ++++----
 rules/dsp.smk                       | 37 ++++++++++--
 rules/hit.smk                       |  9 +++
 rules/pht.smk                       |  7 +++
 rules/pht_fast.smk                  |  2 +
 rules/psp.smk                       | 13 +++--
 rules/tcm.smk                       |  1 +
 scripts/build_dsp.py                | 18 +++++-
 scripts/merge_channels.py           | 48 ++++++++++++---
 scripts/pars_dsp_dplms.py           | 21 ++++---
 scripts/pars_dsp_eopt.py            | 24 ++++----
 scripts/pars_dsp_event_selection.py | 19 +++---
 scripts/pars_dsp_nopt.py            | 17 +++---
 scripts/pars_dsp_tau.py             | 13 ++++-
 scripts/pars_hit_aoe.py             | 20 +++++--
 scripts/pars_hit_ecal.py            | 16 ++---
 scripts/pars_hit_lq.py              | 29 +++++----
 scripts/pars_hit_qc.py              | 91 +++++++++++++++++++++--------
 scripts/pars_pht_aoecal.py          | 13 +++--
 scripts/pars_pht_fast.py            | 14 +++--
 scripts/pars_pht_lqcal.py           | 14 +++--
 scripts/pars_pht_partcal.py         | 22 +++----
 scripts/pars_pht_qc.py              | 37 ++++++------
 scripts/pars_pht_qc_phy.py          | 19 +++---
 scripts/pars_tcm_pulser.py          |  9 ++-
 scripts/util/convert_np.py          | 14 +++++
 26 files changed, 385 insertions(+), 165 deletions(-)
 create mode 100644 scripts/util/convert_np.py

diff --git a/Snakefile b/Snakefile
index 39a3dee..0838a8c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -133,15 +133,15 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-            # remove filelists
-    files = glob.glob(os.path.join(filelist_path(setup), "*"))
-    for file in files:
-        if os.path.isfile(file):
-            os.remove(file)
-    if os.path.exists(filelist_path(setup)):
-        os.rmdir(filelist_path(setup))
-
-        # remove logs
+            #     # remove filelists
+            # files = glob.glob(os.path.join(filelist_path(setup), "*"))
+            # for file in files:
+            #     if os.path.isfile(file):
+            #         os.remove(file)
+            # if os.path.exists(filelist_path(setup)):
+            #     os.rmdir(filelist_path(setup))
+
+            # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
@@ -171,11 +171,12 @@ rule gen_filelist:
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        Path(filelist_path(setup)) / "{label}-{tier}.filelist",
+        temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
         if len(input) == 0:
             print(
-                "WARNING: No files found for the given pattern\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
+                f"WARNING: No files found for the given pattern:{wildcards.label}",
+                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
             )
         with open(output[0], "w") as f:
             for fn in input:
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 3fa105c..34f7422 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -58,13 +58,14 @@ rule build_pars_dsp_tau:
         "{basedir}/../scripts/pars_dsp_tau.py "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--plot_path {output.plots} "
         "--output_file {output.decay_const} "
         "--pulser_file {input.pulser} "
-        "--raw_files {input.files}"
+        "--raw_files {input.files} "
 
 
 rule build_pars_event_selection:
@@ -93,6 +94,7 @@ rule build_pars_event_selection:
         "{basedir}/../scripts/pars_dsp_event_selection.py "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -132,6 +134,7 @@ rule build_pars_dsp_nopt:
         "--database {input.database} "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -175,6 +178,7 @@ rule build_pars_dsp_dplms:
         "--inplots {input.inplots} "
         "--configs {configs} "
         "--log {log} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -210,6 +214,7 @@ rule build_pars_dsp_eopt:
         "{basedir}/../scripts/pars_dsp_eopt.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -226,9 +231,9 @@ rule build_svm_dsp:
         hyperpars=lambda wildcards: get_input_par_file(
             wildcards, "dsp", "svm_hyperpars"
         ),
-        train_data=lambda wildcards: get_input_par_file(
-            wildcards, "dsp", "svm_hyperpars"
-        ).replace("hyperpars.json", "train.lh5"),
+        train_data=lambda wildcards: str(
+            get_input_par_file(wildcards, "dsp", "svm_hyperpars")
+        ).replace("hyperpars.yaml", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -274,9 +279,12 @@ rule build_plts_dsp:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         get_pattern_plts(setup, "dsp"),
     group:
@@ -286,6 +294,7 @@ rule build_plts_dsp:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_dsp_objects:
@@ -300,6 +309,9 @@ rule build_pars_dsp_objects:
             name="objects",
             extension="pkl",
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         get_pattern_pars(
             setup,
@@ -315,6 +327,8 @@ rule build_pars_dsp_objects:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
 
 
 rule build_pars_dsp_db:
@@ -324,9 +338,12 @@ rule build_pars_dsp_db:
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
-            configs,
+            det_status,
             chan_maps,
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         temp(
             get_pattern_pars_tmp(
@@ -342,6 +359,8 @@ rule build_pars_dsp_db:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
 
 
 rule build_pars_dsp:
@@ -369,6 +388,9 @@ rule build_pars_dsp:
             extension="dir",
             check_in_cycle=check_in_cycle,
         ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
     output:
         out_file=get_pattern_pars(
             setup,
@@ -386,6 +408,8 @@ rule build_pars_dsp:
         "--in_db {input.in_db} "
         "--out_db {output.out_db} "
         "--input {input.in_files} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
 
 
 rule build_dsp:
@@ -415,6 +439,7 @@ rule build_dsp:
         "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--input {params.ro_input[raw_file]} "
diff --git a/rules/hit.smk b/rules/hit.smk
index af1fcaf..bb42651 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -44,6 +44,7 @@ rule build_qc:
             filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist"
         ),
         pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards),
     params:
         timestamp="{timestamp}",
         datatype="cal",
@@ -65,11 +66,13 @@ rule build_qc:
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--plot_path {output.plot_file} "
         "--save_path {output.qc_file} "
         "--pulser_file {input.pulser} "
         "--cal_files {input.files} "
         "--fft_files {input.fft_files} "
+        "--overwrite_files {input.overwrite_files} "
 
 
 # This rule builds the energy calibration using the calibration dsp files
@@ -158,6 +161,7 @@ rule build_aoe_calibration:
         "{basedir}/../scripts/pars_hit_aoe.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -204,6 +208,7 @@ rule build_lq_calibration:
         "{basedir}/../scripts/pars_hit_lq.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -246,6 +251,7 @@ rule build_pars_hit_objects:
         "{basedir}/../scripts/merge_channels.py "
         "--input {params.ro_input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_plts_hit:
@@ -269,6 +275,7 @@ rule build_plts_hit:
         "{basedir}/../scripts/merge_channels.py "
         "--input {params.ro_input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_hit:
@@ -300,6 +307,7 @@ rule build_pars_hit:
         "{basedir}/../scripts/merge_channels.py "
         "--input {params.ro_input[infiles]} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_hit:
@@ -326,6 +334,7 @@ rule build_hit:
         "{swenv} python3 -B "
         "{basedir}/../scripts/build_hit.py "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--log {log} "
         "--tier {params.tier} "
         "--datatype {params.datatype} "
diff --git a/rules/pht.smk b/rules/pht.smk
index dad1a24..e638832 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -129,6 +129,7 @@ for key, dataset in part.datasets.items():
                 "{basedir}/../scripts/pars_pht_qc.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--channel {params.channel} "
@@ -181,6 +182,7 @@ rule build_pht_qc:
         "{basedir}/../scripts/pars_pht_qc.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -536,6 +538,7 @@ for key, dataset in part.datasets.items():
                 "{basedir}/../scripts/pars_pht_aoecal.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
@@ -596,6 +599,7 @@ rule build_pht_aoe_calibrations:
         "{basedir}/../scripts/pars_pht_aoecal.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -714,6 +718,7 @@ for key, dataset in part.datasets.items():
                 "{basedir}/../scripts/pars_pht_lqcal.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
@@ -769,6 +774,7 @@ rule build_pht_lq_calibration:
         "{basedir}/../scripts/pars_pht_lqcal.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
@@ -896,6 +902,7 @@ rule build_pht:
         "{swenv} python3 -B "
         "{basedir}/../scripts/build_hit.py "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--log {log} "
         "--tier {params.tier} "
         "--datatype {params.datatype} "
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index f83e534..9369b6b 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -108,6 +108,7 @@ for key, dataset in part.datasets.items():
                 f"{basedir}/../scripts/pars_pht_fast.py "
                 "--log {log} "
                 "--configs {configs} "
+                "--metadata {meta} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
@@ -166,6 +167,7 @@ rule par_pht_fast:
         "{basedir}/../scripts/pars_pht_fast.py "
         "--log {log} "
         "--configs {configs} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
diff --git a/rules/psp.smk b/rules/psp.smk
index 53e8f59..260be19 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -182,11 +182,9 @@ rule build_svm_psp:
         hyperpars=lambda wildcards: get_input_par_file(
             wildcards, "psp", "svm_hyperpars"
         ),
-        train_data=lambda wildcards: get_input_par_file(
-            wildcards, "psp", "svm_hyperpars"
-        )
-        .as_posix()
-        .replace("hyperpars.json", "train.lh5"),
+        train_data=lambda wildcards: str(
+            get_input_par_file(wildcards, "psp", "svm_hyperpars")
+        ).replace("hyperpars.yaml", "train.lh5"),
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -252,6 +250,7 @@ rule build_pars_psp_objects:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_plts_psp:
@@ -273,6 +272,7 @@ rule build_plts_psp:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_psp_db:
@@ -300,6 +300,7 @@ rule build_pars_psp_db:
         "{basedir}/../scripts/merge_channels.py "
         "--input {input} "
         "--output {output} "
+        "--channelmap {meta} "
 
 
 rule build_pars_psp:
@@ -344,6 +345,7 @@ rule build_pars_psp:
         "--in_db {input.in_db} "
         "--out_db {output.out_db} "
         "--input {input.in_files} "
+        "--channelmap {meta} "
 
 
 rule build_psp:
@@ -373,6 +375,7 @@ rule build_psp:
         "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
         f"--configs {ro(configs)} "
+        "--metadata {meta} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--input {params.ro_input[raw_file]} "
diff --git a/rules/tcm.smk b/rules/tcm.smk
index c1164bb..e3a3410 100644
--- a/rules/tcm.smk
+++ b/rules/tcm.smk
@@ -66,3 +66,4 @@ rule build_pulser_ids:
         "--channel {params.channel} "
         "--tcm_files {params.input} "
         "--pulser_file {output.pulser} "
+        "--metadata {meta} "
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 02bf6a1..902ac4b 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 from dspeed import build_dsp
-from legendmeta import TextDB
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 
@@ -27,11 +27,15 @@ def replace_list_with_array(dic):
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+argparser.add_argument("--log", help="log file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+
 argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
-argparser.add_argument("--log", help="log file", type=str)
 argparser.add_argument("--input", help="input file", type=str)
+
 argparser.add_argument("--output", help="output file", type=str)
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
@@ -41,14 +45,22 @@ def replace_list_with_array(dic):
 logging.getLogger("numba").setLevel(logging.INFO)
 logging.getLogger("parse").setLevel(logging.INFO)
 logging.getLogger("lgdo").setLevel(logging.INFO)
+logging.getLogger("legendmeta").setLevel(logging.INFO)
 log = logging.getLogger(__name__)
 
+meta = LegendMetadata(path=args.metadata)
+chan_map = meta.channelmap(args.timestamp, system=args.datatype)
+
+
 configs = TextDB(args.configs, lazy=True)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][
     "inputs"
 ]["processing_chain"]
 
-channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
+channel_dict = {
+    f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
+    for chan, file in channel_dict.items()
+}
 db_files = [
     par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml")
 ]
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index e8994be..5fb6d68 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 
 import numpy as np
+from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo import lh5
 from util.FileKey import ChannelProcKey
@@ -37,6 +38,19 @@ def replace_path(d, old_path, new_path):
     type=str,
     required=False,
 )
+argparser.add_argument(
+    "--channelmap",
+    help="channelmap",
+    type=str,
+    required=False,
+    default=None,
+)
+argparser.add_argument(
+    "--timestamp",
+    help="timestamp",
+    type=str,
+    required=False,
+)
 args = argparser.parse_args()
 
 # change to only have 1 output file for multiple inputs
@@ -46,6 +60,12 @@ def replace_path(d, old_path, new_path):
 
 file_extension = Path(args.output).suffix
 
+if args.channelmap is not None:
+    channel_map = LegendMetadata(args.channelmap, lazy=True)
+    chmap = channel_map.channelmap(args.timestamp)
+else:
+    chmap = None
+
 if file_extension == ".dat" or file_extension == ".dir":
     out_file = Path(args.output).with_suffix("")
 else:
@@ -61,9 +81,12 @@ def replace_path(d, old_path, new_path):
     for channel in channel_files:
         if Path(channel).suffix == file_extension:
             channel_dict = Props.read_from(channel)
-
             fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            channel_name = fkey.channel
+            if chmap is not None:
+                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+            else:
+
+                channel_name = fkey.channel
             out_dict[channel_name] = channel_dict
         else:
             msg = "Output file extension does not match input file extension"
@@ -79,7 +102,11 @@ def replace_path(d, old_path, new_path):
         with Path(channel).open("rb") as r:
             channel_dict = pkl.load(r)
         fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-        channel_name = fkey.channel
+        if chmap is not None:
+            channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+        else:
+
+            channel_name = fkey.channel
         out_dict[channel_name] = channel_dict
 
     with Path(temp_output).open("wb") as w:
@@ -89,12 +116,16 @@ def replace_path(d, old_path, new_path):
 
 elif file_extension == ".dat" or file_extension == ".dir":
     common_dict = {}
-    with shelve.open(out_file, "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
+    with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
         for channel in channel_files:
             with Path(channel).open("rb") as r:
                 channel_dict = pkl.load(r)
-            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            channel_name = fkey.channel
+            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name)
+            if chmap is not None:
+                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+            else:
+
+                channel_name = fkey.channel
             if isinstance(channel_dict, dict) and "common" in list(channel_dict):
                 chan_common_dict = channel_dict.pop("common")
                 common_dict[channel_name] = chan_common_dict
@@ -109,8 +140,11 @@ def replace_path(d, old_path, new_path):
     for channel in channel_files:
         if Path(channel).suffix == file_extension:
             fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            channel_name = fkey.channel
+            if chmap is not None:
+                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+            else:
 
+                channel_name = fkey.channel
             tb_in = lh5.read(f"{channel_name}", channel)
 
             lh5.write(
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 607613c..87403b8 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -15,10 +15,11 @@
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
 argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str)
+argparser.add_argument("--database", help="database", type=str, required=True)
 
 argparser.add_argument("--log", help="log_file", type=str)
-argparser.add_argument("--database", help="database", type=str, required=True)
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -42,6 +43,10 @@
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
 
@@ -56,11 +61,9 @@
 
     t0 = time.time()
     log.info("\nLoad fft data")
-    energies = sto.read(f"{args.channel}/raw/daqenergy", fft_files)[0]
+    energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    raw_fft = sto.read(
-        f"{args.channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
-    )[0]
+    raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0]
     t1 = time.time()
     log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
 
@@ -69,12 +72,12 @@
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
+    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
     ids = np.isin(peaks, peaks_rounded)
     peaks = peaks[ids]
     idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
-    raw_cal = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0]
+    raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
     log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
 
     if isinstance(dsp_config, (str, list)):
@@ -107,7 +110,7 @@
     dplms_pars = Table(col_dict={"coefficients": Array(coeffs)})
     out_dict["dplms"][
         "coefficients"
-    ] = f"loadlh5('{args.lh5_path}', '{args.channel}/dplms/coefficients')"
+    ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')"
 
     log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
 else:
@@ -124,7 +127,7 @@
 Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True)
 sto.write(
     Table(col_dict={"dplms": dplms_pars}),
-    name=args.channel,
+    name=channel,
     lh5_file=args.lh5_path,
     wo_mode="overwrite",
 )
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index bcda090..d4f0098 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -26,12 +26,12 @@
 argparser = argparse.ArgumentParser()
 
 argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
-
 argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str)
 
 argparser.add_argument("--log", help="log_file", type=str)
+argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -58,6 +58,10 @@
 sto = lh5.LH5Store()
 t0 = time.time()
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
 dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][
@@ -108,12 +112,12 @@
         )
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{args.channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
+    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
     ids = np.isin(peaks, peaks_rounded)
     peaks = peaks[ids]
     idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
-    tb_data = sto.read(f"{args.channel}/raw", args.peak_file, idx=ids)[0]
+    tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
 
     t1 = time.time()
     log.info(f"Data Loaded in {(t1-t0)/60} minutes")
@@ -318,32 +322,32 @@
     out_alpha_dict = {}
     out_alpha_dict["cuspEmax_ctc"] = {
         "expression": "cuspEmax*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["cuspEftp_ctc"] = {
         "expression": "cuspEftp*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_cusp.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["zacEmax_ctc"] = {
         "expression": "zacEmax*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["zacEftp_ctc"] = {
         "expression": "zacEftp*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_zac.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["trapEmax_ctc"] = {
         "expression": "trapEmax*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
     }
 
     out_alpha_dict["trapEftp_ctc"] = {
         "expression": "trapEftp*(1+dt_eff*a)",
-        "parameters": {"a": round(bopt_trap.optimal_results["alpha"], 9)},
+        "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
     }
     if "ctc_params" in db_dict:
         db_dict["ctc_params"].update(out_alpha_dict)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 2e6505b..f4dfd7d 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -83,10 +83,11 @@ def get_out_data(
     argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
 
     argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True)
 
     argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
@@ -108,6 +109,10 @@ def get_out_data(
     sto = lh5.LH5Store()
     t0 = time.time()
 
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
     conf = LegendMetadata(path=args.configs)
     configs = conf.on(args.timestamp, system=args.datatype)
     dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][
@@ -121,11 +126,11 @@ def get_out_data(
     db_dict = Props.read_from(args.decay_const)
 
     Path(args.peak_file).parent.mkdir(parents=True, exist_ok=True)
+    rng = np.random.default_rng()
+    rand_num = f"{rng.integers(0,99999):05d}"
+    temp_output = f"{args.peak_file}.{rand_num}"
     if peak_dict.pop("run_selection") is True:
         log.debug("Starting peak selection")
-        rng = np.random.default_rng()
-        rand_num = f"{rng.integers(0,99999):05d}"
-        temp_output = f"{args.peak_file}.{rand_num}"
 
         with Path(args.raw_filelist).open() as f:
             files = f.read().splitlines()
@@ -141,13 +146,13 @@ def get_out_data(
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
-                tcm_files, args.channel, peak_dict["pulser_multiplicity_threshold"]
+                tcm_files, channel, peak_dict["pulser_multiplicity_threshold"]
             )
         else:
             msg = "No pulser file or tcm filelist provided"
             raise ValueError(msg)
 
-        raw_dict = Props.read_from(args.raw_cal)[args.channel]["pars"]["operations"]
+        raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"]
 
         peaks_kev = peak_dict["peaks"]
         kev_widths = peak_dict["kev_widths"]
@@ -156,7 +161,7 @@ def get_out_data(
         final_cut_field = peak_dict["final_cut_field"]
         energy_parameter = peak_dict.get("energy_parameter", "trapTmax")
 
-        lh5_path = f"{args.channel}/raw"
+        lh5_path = f"{channel}/raw"
 
         if not isinstance(kev_widths, list):
             kev_widths = [kev_widths]
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 47261d2..5de3a59 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -20,6 +20,7 @@
 argparser.add_argument("--inplots", help="inplots", type=str)
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 argparser.add_argument("--log", help="log_file", type=str)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -44,6 +45,10 @@
 
 t0 = time.time()
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 conf = LegendMetadata(path=args.configs)
 configs = conf.on(args.timestamp, system=args.datatype)
 dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][
@@ -61,9 +66,9 @@
 
     raw_files = sorted(files)
 
-    energies = sto.read(f"{args.channel}/raw/daqenergy", raw_files)[0]
+    energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    tb_data = sto.read(f"{args.channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0]
+    tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0]
     t1 = time.time()
     log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}")
 
@@ -72,7 +77,7 @@
     cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars"))
     cut_idxs = get_cut_indexes(dsp_data, cut_dict)
     tb_data = sto.read(
-        f"{args.channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs]
+        f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs]
     )[0]
     log.info(f"... {len(tb_data)} baselines after cuts")
 
@@ -81,12 +86,10 @@
 
     if args.plot_path:
         out_dict, plot_dict = pno.noise_optimization(
-            tb_data, dsp_config, db_dict.copy(), opt_dict, args.channel, display=1
+            tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1
         )
     else:
-        out_dict = pno.noise_optimization(
-            raw_files, dsp_config, db_dict.copy(), opt_dict, args.channel
-        )
+        out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel)
 
     t2 = time.time()
     log.info(f"Optimiser finished in {(t2-t0)/60} minutes")
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 82cec2d..b584648 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -13,10 +13,13 @@
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 argparser.add_argument("--log", help="log file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
 argparser.add_argument("--plot_path", help="plot path", type=str, required=False)
 argparser.add_argument("--output_file", help="output file", type=str, required=True)
 
@@ -37,6 +40,10 @@
 sto = lh5.LH5Store()
 log = logging.getLogger(__name__)
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(path=args.configs)
 config_dict = configs.on(args.timestamp, system=args.datatype)
 channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][
@@ -66,14 +73,14 @@
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
         raise ValueError(msg)
 
     data = sto.read(
-        f"{args.channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"]
+        f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"]
     )[0].view_as("pd")
     threshold = kwarg_dict.pop("threshold")
 
@@ -89,7 +96,7 @@
     cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0]
 
     tb_data = sto.read(
-        f"{args.channel}/raw",
+        f"{channel}/raw",
         input_file,
         idx=cuts,
         n_rows=kwarg_dict.pop("n_events"),
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index a393868..c30c7ef 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -15,6 +15,7 @@
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
@@ -103,17 +104,20 @@ def aoe_calibration(
 argparser.add_argument("files", help="files", nargs="*", type=str)
 argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
 argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
+
 argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
 argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
 argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--log", help="log_file", type=str)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-argparser.add_argument("--log", help="log_file", type=str)
-
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--aoe_results", help="aoe_results", type=str)
@@ -129,6 +133,10 @@ def aoe_calibration(
 logging.getLogger("matplotlib").setLevel(logging.INFO)
 logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(path=args.configs)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
     "pars_hit_aoecal"
@@ -194,7 +202,7 @@ def eres_func(x):
     # load data in
     data, threshold_mask = load_data(
         files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict.pop("threshold"),
@@ -213,7 +221,7 @@ def eres_func(x):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+            tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -231,6 +239,7 @@ def eres_func(x):
         sigma_func=sigma_func,
         **kwarg_dict,
     )
+    obj.pdf = obj.pdf.name
 
     # need to change eres func as can't pickle lambdas
     try:
@@ -266,6 +275,9 @@ def eres_func(x):
     "pars": {"operations": cal_dict},
     "results": results_dict,
 }
+
+final_hit_dict = convert_dict_np_to_float(final_hit_dict)
+
 Props.write_to(args.hit_pars, final_hit_dict)
 
 Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index b310500..c94041d 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -22,6 +22,7 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
@@ -452,8 +453,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
-    det_status = chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["usability"]
+    det_status = chmap[args.channel]["analysis"]["usability"]
 
     if args.in_hit_dict:
         hit_dict = Props.read_from(args.in_hit_dict)
@@ -466,7 +468,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     database_dic = Props.read_from(db_files)
 
-    hit_dict.update(database_dic[args.channel]["ctc_params"])
+    hit_dict.update(database_dic[channel]["ctc_params"])
 
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
@@ -497,7 +499,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     # load data in
     data, threshold_mask = load_data(
         files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         hit_dict,
         params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"],
         threshold=kwarg_dict["threshold"],
@@ -515,7 +517,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -698,14 +700,14 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     if "monitoring_parameters" in kwarg_dict:
         monitor_dict = monitor_parameters(
-            files, f"{args.channel}/dsp", kwarg_dict["monitoring_parameters"]
+            files, f"{channel}/dsp", kwarg_dict["monitoring_parameters"]
         )
         results_dict.update({"monitoring_parameters": monitor_dict})
 
     # get baseline plots and save all plots to file
     if args.plot_path:
         common_dict = baseline_tracking_plots(
-            sorted(files), f"{args.channel}/dsp", plot_options=bl_plots
+            sorted(files), f"{channel}/dsp", plot_options=bl_plots
         )
 
         for plot in list(common_dict):
@@ -739,7 +741,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
-    output_dict = {"pars": hit_dict, "results": {"ecal": results_dict}}
+    output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}})
     Props.write_to(args.save_path, output_dict)
 
     # save calibration objects
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 579b34a..169b560 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -11,10 +11,12 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
+from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
@@ -128,12 +130,13 @@ def lq_calibration(
 argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+argparser.add_argument("--log", help="log_file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-argparser.add_argument("--log", help="log_file", type=str)
-
 argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
 argparser.add_argument("--hit_pars", help="hit_pars", type=str)
 argparser.add_argument("--lq_results", help="lq_results", type=str)
@@ -148,6 +151,10 @@ def lq_calibration(
 logging.getLogger("h5py").setLevel(logging.INFO)
 logging.getLogger("matplotlib").setLevel(logging.INFO)
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
 configs = LegendMetadata(path=args.configs)
 channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
     "pars_hit_lqcal"
@@ -197,7 +204,7 @@ def eres_func(x):
     # load data in
     data, threshold_mask = load_data(
         files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict.pop("threshold"),
@@ -216,7 +223,7 @@ def eres_func(x):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+            tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -262,19 +269,19 @@ def eres_func(x):
         pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
 
 
-results_dict = dict(**eres_dict, lq=out_dict)
+final_hit_dict = convert_dict_np_to_float(
+    {
+        "pars": {"operations": cal_dict},
+        "results": dict(**eres_dict, lq=out_dict),
+    }
+)
 Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
-final_hit_dict = {
-    "pars": {"operations": cal_dict},
-    "results": results_dict,
-}
 Props.write_to(args.hit_pars, final_hit_dict)
 
-Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
 final_object_dict = dict(
     **object_dict,
     lq=obj,
 )
-Props.write_to(args.lq_results, final_object_dict)
+Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
 with Path(args.lq_results).open("wb") as w:
     pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 5311c46..320fee9 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -18,6 +18,7 @@
     get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 
@@ -28,17 +29,26 @@
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+
     argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
     argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+    argparser.add_argument(
+        "--overwrite_files",
+        help="overwrite_files",
+        type=str,
+        required=False,
+        nargs="*",
+    )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
     argparser.add_argument("--tier", help="tier", type=str, default="hit")
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
     argparser.add_argument("--save_path", help="save_path", type=str)
     args = argparser.parse_args()
@@ -51,6 +61,10 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
@@ -58,19 +72,37 @@
 
     kwarg_dict = Props.read_from(channel_dict)
 
+    if args.overwrite_files:
+        overwrite = Props.read_from(args.overwrite_files)
+        if channel in overwrite:
+            overwrite = overwrite[channel]["pars"]["operations"]
+        else:
+            overwrite = None
+    else:
+        overwrite = None
+
+    if len(args.fft_files) == 1 and Path(args.fft_files[0]).suffix == ".filelist":
+        with Path(args.fft_files[0]).open() as f:
+            fft_files = f.read().splitlines()
+    else:
+        fft_files = args.fft_files
+
+    if len(args.cal_files) == 1 and Path(args.cal_files[0]).suffix == ".filelist":
+        with Path(args.cal_files[0]).open() as f:
+            cal_files = f.read().splitlines()
+    else:
+        cal_files = args.fft_files
+
     kwarg_dict_fft = kwarg_dict["fft_fields"]
-    if len(args.fft_files) > 0:
+    if len(fft_files) > 0:
         fft_fields = get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(args.fft_files[0], f"{args.channel}/dsp/")
-            ],
+            [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")],
             kwarg_dict_fft["cut_parameters"],
         )
 
         fft_data = load_data(
-            args.fft_files,
-            f"{args.channel}/dsp",
+            fft_files,
+            f"{channel}/dsp",
             {},
             [*fft_fields, "timestamp", "trapTmax"],
         )
@@ -123,31 +155,31 @@
         hit_dict_fft = {}
         plot_dict_fft = {}
 
+    if overwrite is not None:
+        for name in kwarg_dict_fft["cut_parameters"]:
+            for cut_name, cut_dict in overwrite.items():
+                if name in cut_name:
+                    hit_dict_fft.update({cut_name: cut_dict})
+
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
-        [
-            key.replace(f"{args.channel}/dsp/", "")
-            for key in ls(args.cal_files[0], f"{args.channel}/dsp/")
-        ],
+        [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
         kwarg_dict_cal["cut_parameters"],
     )
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         cut_fields += get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(args.cal_files[0], f"{args.channel}/dsp/")
-            ],
+            [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
             init_cal["cut_parameters"],
         )
 
     # load data in
     data, threshold_mask = load_data(
-        args.cal_files,
-        f"{args.channel}/dsp",
+        cal_files,
+        f"{channel}/dsp",
         {},
-        [*cut_fields, "timestamp", "trapTmax"],
+        [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"],
         threshold=kwarg_dict_cal.get("threshold", 0),
         return_selection_mask=True,
         cal_energy_param="trapTmax",
@@ -163,7 +195,7 @@
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -201,16 +233,19 @@
             for key in info.get("parameters", None):
                 exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
             data[outname] = data.eval(exp, local_dict=info.get("parameters", None))
-            ct_mask = ct_mask & data[outname]
+            if "classifier" not in outname:
+                ct_mask = ct_mask & data[outname]
 
+        mask = mask[ct_mask[(~data["is_pulser"] & ~data["is_recovering"]).to_numpy()]]
         data = data[ct_mask]
-        mask = mask[ct_mask]
+        log.debug("initial cal cuts applied")
+        log.debug(f"cut_dict is: {json.dumps(hit_dict_init_cal, indent=2)}")
 
     else:
         hit_dict_init_cal = {}
         plot_dict_init_cal = {}
 
-    if len(data.query("is_pulser & ~is_recovering")) > 500:
+    if len(data.query("is_pulser & ~is_recovering")) < 500:
         data = data.query("is_pulser & ~is_recovering")
     else:
         data = data.query("~is_pulser & ~is_recovering")[mask]
@@ -222,9 +257,17 @@
         display=1 if args.plot_path else 0,
     )
 
+    if overwrite is not None:
+        for name in kwarg_dict_cal["cut_parameters"]:
+            for cut_name, cut_dict in overwrite.items():
+                if name in cut_name:
+                    hit_dict_cal.update({cut_name: cut_dict})
+
     hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
+    hit_dict = convert_dict_np_to_float(hit_dict)
+
     Path(args.save_path).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.save_path, hit_dict)
 
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index e9573e3..ca938e5 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -255,12 +255,13 @@ def eres_func(x):
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str)
@@ -276,6 +277,10 @@ def eres_func(x):
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
         "pars_pht_aoecal"
@@ -350,7 +355,7 @@ def eres_func(x):
         # load data in
         data, threshold_mask = load_data(
             final_dict,
-            f"{args.channel}/dsp",
+            f"{channel}/dsp",
             cal_dict,
             params=params,
             threshold=kwarg_dict.pop("threshold"),
@@ -372,7 +377,7 @@ def eres_func(x):
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
-                tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
             )
         else:
             msg = "No pulser file or tcm filelist provided"
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 4064b3c..104ad05 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -54,13 +54,13 @@ def run_splitter(files):
     argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
-    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
@@ -77,6 +77,10 @@ def run_splitter(files):
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     cal_dict = {}
     results_dicts = {}
     for ecal in args.ecal_file:
@@ -167,7 +171,7 @@ def run_splitter(files):
     # load data in
     data, threshold_mask = load_data(
         final_dict,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict["threshold"],
@@ -191,7 +195,7 @@ def run_splitter(files):
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -213,7 +217,7 @@ def run_splitter(files):
         object_dict,
         inplots_dict,
         args.timestamp,
-        args.metadata,
+        chmap,
         args.configs,
         args.channel,
         args.datatype,
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 2ba88af..2c67745 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -13,6 +13,7 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
+from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
@@ -251,12 +252,13 @@ def eres_func(x):
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str)
@@ -272,6 +274,10 @@ def eres_func(x):
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
         "pars_pht_lqcal"
@@ -337,7 +343,7 @@ def eres_func(x):
         # load data in
         data, threshold_mask = load_data(
             final_dict,
-            f"{args.channel}/dsp",
+            f"{channel}/dsp",
             cal_dict,
             params=params,
             threshold=kwarg_dict.pop("threshold"),
@@ -360,7 +366,7 @@ def eres_func(x):
                 tcm_files = f.read().splitlines()
             tcm_files = sorted(np.unique(tcm_files))
             ids, mask = get_tcm_pulser_ids(
-                tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
             )
         else:
             msg = "No pulser file or tcm filelist provided"
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a6eab18..a2d74e4 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -142,18 +142,14 @@ def calibrate_partition(
     object_dicts,
     plot_dicts,
     timestamp,
-    metadata_path,
+    chmap,
     configs,
     channel,
     datatype,
     gen_plots=True,
 ):
 
-    # load metadata
-    meta = LegendMetadata(path=metadata_path)
-    chmap = meta.channelmap(timestamp)
-
-    det_status = chmap.map("daq.rawid")[int(channel[2:])]["analysis"]["usability"]
+    det_status = chmap[channel]["analysis"]["usability"]
 
     configs = LegendMetadata(path=configs)
     channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][
@@ -418,13 +414,13 @@ def calibrate_partition(
     argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
     argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
 
-    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
@@ -441,6 +437,10 @@ def calibrate_partition(
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     cal_dict = {}
     results_dicts = {}
     for ecal in args.ecal_file:
@@ -498,7 +498,7 @@ def calibrate_partition(
     # load data in
     data, threshold_mask = load_data(
         final_dict,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         cal_dict,
         params=params,
         threshold=kwarg_dict["threshold"],
@@ -521,7 +521,7 @@ def calibrate_partition(
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -543,7 +543,7 @@ def calibrate_partition(
         object_dict,
         inplots_dict,
         timestamp,
-        args.metadata,
+        chmap,
         args.configs,
         args.channel,
         args.datatype,
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 790ee0a..495c87b 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -18,6 +18,7 @@
     get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 
@@ -28,6 +29,7 @@
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+
     argparser.add_argument(
         "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False
     )
@@ -39,12 +41,13 @@
     )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
     argparser.add_argument(
         "--save_path",
@@ -62,6 +65,10 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
@@ -85,8 +92,8 @@
 
     if args.overwrite_files:
         overwrite = Props.read_from(args.overwrite_files)
-        if args.channel in overwrite:
-            overwrite = overwrite[args.channel]["pars"]["operations"]
+        if channel in overwrite:
+            overwrite = overwrite[channel]["pars"]["operations"]
         else:
             overwrite = None
     else:
@@ -111,15 +118,15 @@
         if len(fft_files) > 0:
             fft_fields = get_keys(
                 [
-                    key.replace(f"{args.channel}/dsp/", "")
-                    for key in ls(fft_files[0], f"{args.channel}/dsp/")
+                    key.replace(f"{channel}/dsp/", "")
+                    for key in ls(fft_files[0], f"{channel}/dsp/")
                 ],
                 kwarg_dict_fft["cut_parameters"],
             )
 
             fft_data = load_data(
                 fft_files,
-                f"{args.channel}/dsp",
+                f"{channel}/dsp",
                 {},
                 [*fft_fields, "timestamp", "trapTmax", "t_sat_lo"],
             )
@@ -184,26 +191,20 @@
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
-        [
-            key.replace(f"{args.channel}/dsp/", "")
-            for key in ls(cal_files[0], f"{args.channel}/dsp/")
-        ],
+        [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
         kwarg_dict_cal["cut_parameters"],
     )
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         cut_fields += get_keys(
-            [
-                key.replace(f"{args.channel}/dsp/", "")
-                for key in ls(cal_files[0], f"{args.channel}/dsp/")
-            ],
+            [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
             init_cal["cut_parameters"],
         )
 
     # load data in
     data, threshold_mask = load_data(
         cal_files,
-        f"{args.channel}/dsp",
+        f"{channel}/dsp",
         {},
         [*cut_fields, "timestamp", "trapTmax", "t_sat_lo"],
         threshold=kwarg_dict_cal.get("threshold", 0),
@@ -226,7 +227,7 @@
             tcm_files = f.read().splitlines()
         tcm_files = sorted(np.unique(tcm_files))
         ids, total_mask = get_tcm_pulser_ids(
-            tcm_files, args.channel, kwarg_dict["pulser_multiplicity_threshold"]
+            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
         )
     else:
         msg = "No pulser file or tcm filelist provided"
@@ -303,6 +304,8 @@
     hit_dict = {**hit_dict_fft, **hit_dict_init_cal, **hit_dict_cal}
     plot_dict = {**plot_dict_fft, **plot_dict_init_cal, **plot_dict_cal}
 
+    hit_dict = convert_dict_np_to_float(hit_dict)
+
     for file in args.save_path:
         Path(file).parent.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, hit_dict)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 48f3d9f..4f87afb 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -17,6 +17,7 @@
     generate_cut_classifiers,
     get_keys,
 )
+from util.convert_np import convert_dict_np_to_float
 
 log = logging.getLogger(__name__)
 
@@ -28,12 +29,13 @@
     argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--log", help="log_file", type=str)
-
     argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
     argparser.add_argument(
         "--save_path",
@@ -51,6 +53,10 @@
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     logging.getLogger("legendmeta").setLevel(logging.INFO)
 
+    meta = LegendMetadata(path=args.metadata)
+    chmap = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+
     # get metadata dictionary
     configs = LegendMetadata(path=args.configs)
     channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
@@ -88,15 +94,12 @@
     kwarg_dict_fft = kwarg_dict["fft_fields"]
 
     cut_fields = get_keys(
-        [
-            key.replace(f"{args.channel}/dsp/", "")
-            for key in ls(phy_files[0], f"{args.channel}/dsp/")
-        ],
+        [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")],
         kwarg_dict_fft["cut_parameters"],
     )
 
     data = sto.read(
-        f"{args.channel}/dsp/",
+        f"{channel}/dsp/",
         phy_files,
         field_mask=[*cut_fields, "daqenergy", "t_sat_lo", "timestamp"],
         idx=np.where(bl_mask)[0],
@@ -145,6 +148,8 @@
     log.debug("fft cuts applied")
     log.debug(f"cut_dict is: {json.dumps(hit_dict, indent=2)}")
 
+    hit_dict = convert_dict_np_to_float(hit_dict)
+
     for file in args.save_path:
         Path(file).name.mkdir(parents=True, exist_ok=True)
         Props.write_to(file, {"pars": {"operations": hit_dict}})
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 27c1101..9e6ad42 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -10,6 +10,7 @@
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
 argparser.add_argument("--log", help="log file", type=str)
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -36,6 +37,10 @@
 config_dict = configs.on(args.timestamp, system=args.datatype)
 kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"]
 
+meta = LegendMetadata(path=args.metadata)
+channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+channel = f"ch{channel_dict[args.channel].daq.rawid}"
+
 kwarg_dict = Props.read_from(kwarg_dict)
 
 if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
@@ -46,9 +51,7 @@
     tcm_files = args.tcm_files
 # get pulser mask from tcm files
 tcm_files = sorted(np.unique(tcm_files))
-ids, mask = get_tcm_pulser_ids(
-    tcm_files, args.channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-)
+ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold"))
 
 Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/scripts/util/convert_np.py b/scripts/util/convert_np.py
new file mode 100644
index 0000000..cdc363c
--- /dev/null
+++ b/scripts/util/convert_np.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+
+def convert_dict_np_to_float(dic):
+    for key in dic:
+        if isinstance(dic[key], dict):
+            convert_dict_np_to_float(dic[key])
+        elif isinstance(dic[key], (np.float32, np.float64)):
+            dic[key] = float(dic[key])
+        elif isinstance(dic[key], (list, tuple)):
+            dic[key] = [
+                float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key]
+            ]
+    return dic

From 4f7e4058bac3836a303cb6b0ceb06cf484c30d07 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 4 Dec 2024 17:40:05 +0100
Subject: [PATCH 014/101] debugging

---
 rules/ann.smk             | 101 ++++++++++++++---------
 rules/dsp.smk             | 165 +++++++++++++++++++-------------------
 rules/evt.smk             | 142 +++++++++++++++++++++-----------
 rules/psp.smk             |   1 +
 scripts/build_ann.py      | 124 ----------------------------
 scripts/build_dsp.py      | 150 +++++++++++++++++++++-------------
 scripts/build_hit.py      |  31 ++++---
 scripts/build_tcm.py      |  16 +++-
 scripts/merge_channels.py |   6 +-
 scripts/pars_dsp_tau.py   |  28 +++----
 scripts/pars_hit_lq.py    |   2 +-
 11 files changed, 380 insertions(+), 386 deletions(-)
 delete mode 100644 scripts/build_ann.py

diff --git a/rules/ann.smk b/rules/ann.smk
index 64cdd50..15558ae 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -4,51 +4,72 @@ to apply the ann and risetime cuts for psd.
 
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.utils import par_dsp_path
 from scripts.util.patterns import (
-    get_pattern_tier_dsp,
-    get_pattern_tier_psp,
-    get_pattern_tier_ann,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
-    get_pattern_pars_overwrite,
 )
 
-for tier in ["ann", "pan"]:
 
-    rule:
-        input:
-            dsp_file=(
-                get_pattern_tier_dsp(setup)
-                if tier == "ann"
-                else get_pattern_tier_psp(setup)
-            ),
-            pars_file=lambda wildcards: get_svm_file(wildcards, "ann", "cuts"),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-        output:
-            tier_file=get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
-            db_file=get_pattern_pars_tmp(setup, f"{tier}_db"),
-        log:
-            get_pattern_log(setup, f"tier_{tier}"),
-        group:
-            "tier-ann"
-        resources:
-            runtime=300,
-            mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
-        shell:
-            "{swenv} python3 -B "
-            f"{workflow.source_path('../scripts/build_ann.py')} "
-            "--log {log} "
-            "--configs {configs} "
-            "--datatype {params.datatype} "
-            "--timestamp {params.timestamp} "
-            "--input {input.dsp_file} "
-            "--output {output.tier_file} "
-            "--db_file {output.db_file} "
-            "--pars_file {input.pars_file} "
+rule build_ann:
+    input:
+        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
+        pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+    output:
+        tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "ann_db"),
+    log:
+        get_pattern_log(setup, "tier_ann"),
+    group:
+        "tier-ann"
+    resources:
+        runtime=300,
+        mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        f"--tier ann "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {input.dsp_file} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {input.pars_file} "
 
-    set_last_rule_name(workflow, f"build_{tier}")
+
+rule build_pan:
+    input:
+        dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
+        pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+    output:
+        tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "pan_db"),
+    log:
+        get_pattern_log(setup, "tier_pan"),
+    group:
+        "tier-ann"
+    resources:
+        runtime=300,
+        mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
+    shell:
+        "{swenv} python3 -B "
+        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        f"--tier pan "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {input.dsp_file} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {input.pars_file} "
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 34f7422..7ae67a7 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -363,86 +363,85 @@ rule build_pars_dsp_db:
         "--channelmap {meta} "
 
 
-rule build_pars_dsp:
-    input:
-        in_files=lambda wildcards: get_par_chanlist(
-            setup,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-            name="dplms",
-            extension="lh5",
-        ),
-        in_db=get_pattern_pars_tmp(
-            setup,
-            "dsp",
-            datatype="cal",
-        ),
-        plts=get_pattern_plts(setup, "dsp"),
-        objects=get_pattern_pars(
-            setup,
-            "dsp",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        out_file=get_pattern_pars(
-            setup,
-            "dsp",
-            extension="lh5",
-            check_in_cycle=check_in_cycle,
-        ),
-        out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
-    group:
-        "merge-dsp"
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
-        "--output {output.out_file} "
-        "--in_db {input.in_db} "
-        "--out_db {output.out_db} "
-        "--input {input.in_files} "
-        "--timestamp {params.timestamp} "
-        "--channelmap {meta} "
-
-
-rule build_dsp:
-    input:
-        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
-        pars_file=ancient(
-            lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "dsp"
-            )
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-    output:
-        tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "dsp_db"),
-    log:
-        get_pattern_log(setup, "tier_dsp"),
-    group:
-        "tier-dsp"
-    resources:
-        runtime=300,
-        mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_dsp.py "
-        "--log {log} "
-        f"--configs {ro(configs)} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--input {params.ro_input[raw_file]} "
-        "--output {output.tier_file} "
-        "--db_file {output.db_file} "
-        "--pars_file {params.ro_input[pars_file]} "
+# rule build_pars_dsp:
+#     input:
+#         in_files=lambda wildcards: get_par_chanlist(
+#             setup,
+#             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+#             "dsp",
+#             basedir,
+#             det_status,
+#             chan_maps,
+#             name="dplms",
+#             extension="lh5",
+#         ),
+#         in_db=get_pattern_pars_tmp(
+#             setup,
+#             "dsp",
+#             datatype="cal",
+#         ),
+#         plts=get_pattern_plts(setup, "dsp"),
+#         objects=get_pattern_pars(
+#             setup,
+#             "dsp",
+#             name="objects",
+#             extension="dir",
+#             check_in_cycle=check_in_cycle,
+#         ),
+#     params:
+#         timestamp="{timestamp}",
+#         datatype="cal",
+#     output:
+#         out_file=get_pattern_pars(
+#             setup,
+#             "dsp",
+#             extension="lh5",
+#             check_in_cycle=check_in_cycle,
+#         ),
+#         out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
+#     group:
+#         "merge-dsp"
+#     shell:
+#         "{swenv} python3 -B "
+#         "{basedir}/../scripts/merge_channels.py "
+#         "--output {output.out_file} "
+#         "--in_db {input.in_db} "
+#         "--out_db {output.out_db} "
+#         "--input {input.in_files} "
+#         "--timestamp {params.timestamp} "
+#         "--channelmap {meta} "
+# rule build_dsp:
+#     input:
+#         raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
+#         pars_file=ancient(
+#             lambda wildcards: ParsCatalog.get_par_file(
+#                 setup, wildcards.timestamp, "dsp"
+#             )
+#         ),
+#     params:
+#         timestamp="{timestamp}",
+#         datatype="{datatype}",
+#         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+#     output:
+#         tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
+#         db_file=get_pattern_pars_tmp(setup, "dsp_db"),
+#     log:
+#         get_pattern_log(setup, "tier_dsp"),
+#     group:
+#         "tier-dsp"
+#     resources:
+#         runtime=300,
+#         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+#     shell:
+#         "{swenv} python3 -B "
+#         "{basedir}/../scripts/build_dsp.py "
+#         "--log {log} "
+#         "--tier dsp "
+#         f"--configs {ro(configs)} "
+#         "--metadata {meta} "
+#         "--datatype {params.datatype} "
+#         "--timestamp {params.timestamp} "
+#         "--input {params.ro_input[raw_file]} "
+#         "--output {output.tier_file} "
+#         "--db_file {output.db_file} "
+#         "--pars_file {params.ro_input[pars_file]} "
diff --git a/rules/evt.smk b/rules/evt.smk
index 9239b96..112c92c 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -11,50 +11,91 @@ from scripts.util.patterns import (
 )
 
 
-for tier in ("evt", "pet"):
+rule build_evt:
+    input:
+        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
+        hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False),
+        tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
+        ann_file=lambda wildcards: (
+            None
+            if int(wildcards["period"][1:]) > 11
+            else get_pattern_tier(setup, "ann", check_in_cycle=False)
+        ),
+        par_files=lambda wildcards: ParsCatalog.get_par_file(
+            setup, wildcards.timestamp, "hit"
+        ),
+        xtalk_matrix=lambda wildcards: get_input_par_file(
+            tier="evt", wildcards=wildcards, name="xtc"
+        ),
+    output:
+        get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        tier="evt",
+        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+    log:
+        get_pattern_log(setup, f"tier_evt"),
+    group:
+        "tier-evt"
+    resources:
+        runtime=300,
+        mem_swap=50,
+    run:
+        shell_string = (
+            f"{swenv} python3 -B "
+            f"{basedir}/../scripts/build_evt.py "
+            f"--configs {ro(configs)} "
+            f"--metadata {ro(meta)} "
+            "--log {log} "
+            "--tier {params.tier} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "--xtc_file {params.ro_input[xtalk_matrix]} "
+            "--par_files {params.ro_input[par_files]} "
+            "--hit_file {params.ro_input[hit_file]} "
+            "--tcm_file {params.ro_input[tcm_file]} "
+            "--dsp_file {params.ro_input[dsp_file]} "
+            "--output {output} "
+        )
+        if input.ann_file is not None:
+            shell_string += "--ann_file {params.ro_input[ann_file]} "
 
-    rule:
-        input:
-            dsp_file=(
-                get_pattern_tier(setup, "dsp", check_in_cycle=False)
-                if tier == "evt"
-                else get_pattern_tier(setup, "psp", check_in_cycle=False)
-            ),
-            hit_file=(
-                get_pattern_tier(setup, "hit", check_in_cycle=False)
-                if tier == "evt"
-                else get_pattern_tier(setup, "pht", check_in_cycle=False)
-            ),
-            tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
-            xtalk_matrix=lambda wildcards: get_input_par_file(
-                tier=tier, wildcards=wildcards, name="xtc"
-            ),
-            ann_file=branch(
-                lambda wildcards: tier if wildcards["period"][1:] <= 11 else "none",
-                cases={
-                    "evt": get_pattern_tier(setup, "ann", check_in_cycle=False),
-                    "pet": get_pattern_tier(setup, "pan", check_in_cycle=False),
-                    "none": None,
-                },
-            ),
-            par_files=lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "pht"
-            ),
-        output:
-            get_pattern_tier(setup, tier, check_in_cycle=check_in_cycle),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-            tier=tier,
-            ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-        log:
-            get_pattern_log(setup, f"tier_{tier}"),
-        group:
-            "tier-evt"
-        resources:
-            runtime=300,
-            mem_swap=50,
-        shell:
+        shell(shell_string)
+
+
+rule build_pet:
+    input:
+        dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
+        hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False),
+        tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
+        ann_file=lambda wildcards: (
+            None
+            if int(wildcards["period"][1:]) > 11
+            else get_pattern_tier(setup, "pan", check_in_cycle=False)
+        ),
+        par_files=lambda wildcards: ParsCatalog.get_par_file(
+            setup, wildcards.timestamp, "pht"
+        ),
+        xtalk_matrix=lambda wildcards: get_input_par_file(
+            tier="pet", wildcards=wildcards, name="xtc"
+        ),
+    output:
+        get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        tier="pet",
+        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+    log:
+        get_pattern_log(setup, f"tier_pet"),
+    group:
+        "tier-evt"
+    resources:
+        runtime=300,
+        mem_swap=50,
+    run:
+        shell_string = (
             f"{swenv} python3 -B "
             f"{basedir}/../scripts/build_evt.py "
             f"--configs {ro(configs)} "
@@ -68,10 +109,15 @@ for tier in ("evt", "pet"):
             "--hit_file {params.ro_input[hit_file]} "
             "--tcm_file {params.ro_input[tcm_file]} "
             "--dsp_file {params.ro_input[dsp_file]} "
-            "--ann_file {params.ro_input[ann_file]} "
             "--output {output} "
+        )
+        if input.ann_file is not None:
+            shell_string += "--ann_file {params.ro_input[ann_file]} "
+
+        shell(shell_string)
+
 
-    set_last_rule_name(workflow, f"build_{tier}")
+for evt_tier in ("evt", "pet"):
 
     rule:
         wildcard_constraints:
@@ -87,14 +133,14 @@ for tier in ("evt", "pet"):
                 )
             ),
         output:
-            get_pattern_tier(setup, f"{tier}_concat", check_in_cycle=check_in_cycle),
+            get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle),
         params:
             timestamp="all",
             datatype="{datatype}",
             lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat",
             ro_input=lambda _, input: utils.as_ro(setup, input),
         log:
-            get_pattern_log_concat(setup, f"tier_{tier}_concat"),
+            get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"),
         group:
             "tier-evt"
         shell:
@@ -102,4 +148,4 @@ for tier in ("evt", "pet"):
             "--output {output} "
             "-- {params.ro_input} &> {log}"
 
-    set_last_rule_name(workflow, f"concat_{tier}")
+    set_last_rule_name(workflow, f"concat_{evt_tier}")
diff --git a/rules/psp.smk b/rules/psp.smk
index 260be19..9fc0861 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -374,6 +374,7 @@ rule build_psp:
         "{swenv} python3 -B "
         "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
+        "--tier psp "
         f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
diff --git a/scripts/build_ann.py b/scripts/build_ann.py
deleted file mode 100644
index 224877a..0000000
--- a/scripts/build_ann.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import argparse
-import json
-import logging
-import os
-import pathlib
-import re
-import time
-import warnings
-
-os.environ["LGDO_CACHE"] = "false"
-os.environ["LGDO_BOUNDSCHECK"] = "false"
-os.environ["DSPEED_CACHE"] = "false"
-os.environ["DSPEED_BOUNDSCHECK"] = "false"
-
-import lgdo.lh5 as lh5
-import numpy as np
-from dspeed import build_dsp
-from legendmeta import LegendMetadata
-from legendmeta.catalog import Props
-
-
-def replace_list_with_array(dic):
-    for key, value in dic.items():
-        if isinstance(value, dict):
-            dic[key] = replace_list_with_array(value)
-        elif isinstance(value, list):
-            dic[key] = np.array(value, dtype="float32")
-        else:
-            pass
-    return dic
-
-
-warnings.filterwarnings(action="ignore", category=RuntimeWarning)
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs path", type=str, required=True)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
-argparser.add_argument("--log", help="log file", type=str)
-argparser.add_argument("--input", help="input file", type=str)
-argparser.add_argument("--output", help="output file", type=str)
-argparser.add_argument("--db_file", help="db file", type=str)
-args = argparser.parse_args()
-
-pathlib.Path(os.path.dirname(args.log)).mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
-
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_ann"][
-    "inputs"
-]["processing_chain"]
-
-channel_dict = {chan: Props.read_from(file) for chan, file in channel_dict.items()}
-db_files = [
-    par_file
-    for par_file in args.pars_file
-    if os.path.splitext(par_file)[1] == ".json" or os.path.splitext(par_file)[1] == ".yml"
-]
-
-database_dic = Props.read_from(db_files, subst_pathvar=True)
-database_dic = replace_list_with_array(database_dic)
-
-pathlib.Path(os.path.dirname(args.output)).mkdir(parents=True, exist_ok=True)
-
-rng = np.random.default_rng()
-rand_num = f"{rng.integers(0,99999):05d}"
-temp_output = f"{args.output}.{rand_num}"
-
-start = time.time()
-
-build_dsp(
-    args.input,
-    temp_output,
-    {},
-    database=database_dic,
-    chan_config=channel_dict,
-    write_mode="r",
-    buffer_len=3200 if args.datatype == "cal" else 3200,
-    block_width=16,
-)
-
-log.info(f"build_ann finished in {time.time()-start}")
-
-os.rename(temp_output, args.output)
-
-if "ann" in args.output:
-    key = os.path.basename(args.output).replace("-tier_ann.lh5", "")
-else:
-    key = os.path.basename(args.output).replace("-tier_pan.lh5", "")
-
-raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
-
-raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
-
-outputs = {}
-channels = []
-for channel, chan_dict in channel_dict.items():
-    output = chan_dict["outputs"]
-    in_dict = False
-    for entry in outputs:
-        if outputs[entry]["fields"] == output:
-            outputs[entry]["channels"].append(channel.split("/")[0])
-            in_dict = True
-    if in_dict is False:
-        outputs[f"group{len(list(outputs))+1}"] = {
-            "channels": [channel.split("/")[0]],
-            "fields": output,
-        }
-    channels.append(channel.split("/")[0])
-
-full_dict = {
-    "valid_fields": {
-        "ann": outputs,
-    },
-    "valid_keys": {key: {"valid_channels": {"ann": channels}}},
-}
-pathlib.Path(os.path.dirname(args.db_file)).mkdir(parents=True, exist_ok=True)
-with open(args.db_file, "w") as w:
-    json.dump(full_dict, w, indent=4)
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 902ac4b..c505058 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -1,9 +1,10 @@
 import argparse
 import logging
-import pathlib
+import logging.config
 import re
 import time
 import warnings
+from pathlib import Path
 
 import numpy as np
 from dspeed import build_dsp
@@ -32,6 +33,7 @@ def replace_list_with_array(dic):
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
 argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
 argparser.add_argument("--input", help="input file", type=str)
@@ -40,35 +42,49 @@ def replace_list_with_array(dic):
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-pathlib.Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+if args.tier in ["dsp", "psp"]:
+    config_dict = config_dict["tier_dsp"]
+elif args.tier in ["ann", "pan"]:
+    config_dict = config_dict["tier_ann"]
+else:
+    msg = f"Tier {args.tier} not supported"
+    raise ValueError(msg)
+
+channel_dict = config_dict["inputs"]["processing_chain"]
+settings_dict = config_dict["options"].get("settings", {})
+if isinstance(settings_dict, str):
+    settings_dict = Props.read_from(settings_dict)
+log_config = config_dict["options"]["logging"]
+
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+log_config = Props.read_from(log_config)
+log_config["handlers"]["file"]["filename"] = args.log
+logging.config.dictConfig(log_config)
+log = logging.getLogger("test")
 
 meta = LegendMetadata(path=args.metadata)
 chan_map = meta.channelmap(args.timestamp, system=args.datatype)
 
-
-configs = TextDB(args.configs, lazy=True)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_dsp"][
-    "inputs"
-]["processing_chain"]
-
-channel_dict = {
-    f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
-    for chan, file in channel_dict.items()
-}
+if args.tier in ["ann", "pan"]:
+    channel_dict = {
+        f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file)
+        for chan, file in channel_dict.items()
+    }
+else:
+    channel_dict = {
+        f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
+        for chan, file in channel_dict.items()
+    }
 db_files = [
-    par_file for par_file in args.pars_file if pathlib.Path(par_file).suffix in (".json", ".yaml")
+    par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
 database_dic = replace_list_with_array(database_dic)
 
-pathlib.Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0, 99999):05d}"
@@ -83,42 +99,66 @@ def replace_list_with_array(dic):
     database=database_dic,
     chan_config=channel_dict,
     write_mode="r",
-    buffer_len=3200 if args.datatype == "cal" else 3200,
-    block_width=16,
+    buffer_len=settings_dict.get("buffer_len", 1000),
+    block_width=settings_dict.get("block_width", 16),
 )
 
 log.info(f"build_dsp finished in {time.time()-start}")
-
-pathlib.Path(temp_output).rename(args.output)
-
-key = pathlib.Path(args.output).name.replace("-tier_dsp.lh5", "")
-
-raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
-
-raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
-
-outputs = {}
-channels = []
-for channel, chan_dict in channel_dict.items():
-    output = chan_dict["outputs"]
-    in_dict = False
-    for entry in outputs:
-        if outputs[entry]["fields"] == output:
-            outputs[entry]["channels"].append(channel.split("/")[0])
-            in_dict = True
-    if in_dict is False:
-        outputs[f"group{len(list(outputs))+1}"] = {
-            "channels": [channel.split("/")[0]],
-            "fields": output,
-        }
-    channels.append(channel.split("/")[0])
-
-full_dict = {
-    "valid_fields": {
-        "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}},
-        "dsp": outputs,
-    },
-    "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
-}
-pathlib.Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
+Path(temp_output).rename(args.output)
+
+key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "")
+
+if args.tier in ["dsp", "psp"]:
+
+    raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
+    raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
+
+    outputs = {}
+    channels = []
+    for channel, chan_dict in channel_dict.items():
+        output = chan_dict["outputs"]
+        in_dict = False
+        for entry in outputs:
+            if outputs[entry]["fields"] == output:
+                outputs[entry]["channels"].append(channel.split("/")[0])
+                in_dict = True
+        if in_dict is False:
+            outputs[f"group{len(list(outputs))+1}"] = {
+                "channels": [channel.split("/")[0]],
+                "fields": output,
+            }
+        channels.append(channel.split("/")[0])
+
+    full_dict = {
+        "valid_fields": {
+            "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}},
+            "dsp": outputs,
+        },
+        "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
+    }
+else:
+    outputs = {}
+    channels = []
+    for channel, chan_dict in channel_dict.items():
+        output = chan_dict["outputs"]
+        in_dict = False
+        for entry in outputs:
+            if outputs[entry]["fields"] == output:
+                outputs[entry]["channels"].append(channel.split("/")[0])
+                in_dict = True
+        if in_dict is False:
+            outputs[f"group{len(list(outputs))+1}"] = {
+                "channels": [channel.split("/")[0]],
+                "fields": output,
+            }
+        channels.append(channel.split("/")[0])
+
+    full_dict = {
+        "valid_fields": {
+            "ann": outputs,
+        },
+        "valid_keys": {key: {"valid_channels": {"ann": channels}}},
+    }
+
+Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.db_file, full_dict)
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 8e2da80..3aba4aa 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -3,7 +3,7 @@
 import time
 from pathlib import Path
 
-from legendmeta import TextDB
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
@@ -13,12 +13,13 @@
 argparser.add_argument("--pars_file", help="hit pars file", nargs="*")
 
 argparser.add_argument("--configs", help="configs", type=str, required=True)
+argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+argparser.add_argument("--log", help="log_file", type=str)
+
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
-argparser.add_argument("--log", help="log_file", type=str)
-
 argparser.add_argument("--output", help="output file", type=str)
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
@@ -41,21 +42,27 @@
     msg = "unknown tier"
     raise ValueError(msg)
 
-pars_dict = Props.read_from(args.pars_file)
+meta = LegendMetadata(path=args.metadata)
+chan_map = meta.channelmap(args.timestamp, system=args.datatype)
 
+pars_dict = Props.read_from(args.pars_file)
 pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()}
 
 hit_dict = {}
 channels_present = lh5.ls(args.input)
 for channel in pars_dict:
     chan_pars = pars_dict[channel].copy()
-    if channel in channel_dict:
-        cfg_dict = Props.read_from(channel_dict[channel])
-        Props.add_to(cfg_dict, chan_pars)
-        chan_pars = cfg_dict
-
-    if channel in channels_present:
-        hit_dict[f"{channel}/dsp"] = chan_pars
+    try:
+        detector = chan_map.map("daq.rawid")[int(channel[2:])].name
+        if detector in channel_dict:
+            cfg_dict = Props.read_from(channel_dict[detector])
+            Props.add_to(cfg_dict, chan_pars)
+            chan_pars = cfg_dict
+
+        if channel in channels_present:
+            hit_dict[f"{channel}/dsp"] = chan_pars
+    except KeyError:
+        pass
 
 t_start = time.time()
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
@@ -79,7 +86,7 @@
         }
     hit_channels.append(channel)
 
-key = Path(args.output).replace(f"-tier_{args.tier}.lh5", "")
+key = args.output.replace(f"-tier_{args.tier}.lh5", "")
 
 full_dict = {
     "valid_fields": {args.tier: hit_outputs},
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 2ceb3ab..faa39d6 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import logging.config
 from pathlib import Path
 
 import lgdo.lh5 as lh5
@@ -18,13 +19,20 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["tier_tcm"]
+log_config = config_dict["options"]["logging"]
+
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+log_config = Props.read_from(log_config)
+log_config["handlers"]["file"]["filename"] = args.log
+logging.config.dictConfig(log_config)
+log = logging.getLogger("test")
 
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-channel_dict = configs["snakemake_rules"]["tier_tcm"]["inputs"]
-settings = Props.read_from(channel_dict["config"])
+
+settings = Props.read_from(config_dict["inputs"]["config"])
 
 rng = np.random.default_rng()
 temp_output = f"{args.output}.{rng.integers(0, 99999):05d}"
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index 5fb6d68..bed04d2 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -76,7 +76,7 @@ def replace_path(d, old_path, new_path):
 
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
-if file_extension == ".json" or file_extension == ".yaml" or file_extension == ".yml":
+if file_extension in (".json", ".yaml", ".yml"):
     out_dict = {}
     for channel in channel_files:
         if Path(channel).suffix == file_extension:
@@ -92,9 +92,7 @@ def replace_path(d, old_path, new_path):
             msg = "Output file extension does not match input file extension"
             raise RuntimeError(msg)
 
-    Props.write_to(temp_output, out_dict, "json")
-
-    Path(temp_output).rename(out_file)
+    Props.write_to(out_file, out_dict)
 
 elif file_extension == ".pkl":
     out_dict = {}
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index b584648..b8d9a71 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import logging.config
 import pickle as pkl
 from pathlib import Path
 
@@ -29,27 +30,24 @@
 argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-
 sto = lh5.LH5Store()
-log = logging.getLogger(__name__)
+
+configs = LegendMetadata(path=args.configs)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"]
+log_config = config_dict["options"]["logging"]
+
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+log_config = Props.read_from(log_config)
+log_config["handlers"]["file"]["filename"] = args.log
+logging.config.dictConfig(log_config)
+log = logging.getLogger("test")
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(path=args.configs)
-config_dict = configs.on(args.timestamp, system=args.datatype)
-channel_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["processing_chain"][
-    args.channel
-]
-kwarg_dict = config_dict["snakemake_rules"]["pars_dsp_tau"]["inputs"]["tau_config"][args.channel]
+channel_dict = config_dict["inputs"]["processing_chain"][args.channel]
+kwarg_dict = config_dict["inputs"]["tau_config"][args.channel]
 
 kwarg_dict = Props.read_from(kwarg_dict)
 
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 169b560..8625ed3 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -27,7 +27,7 @@ def get_results_dict(lq_class):
         "cal_energy_param": lq_class.cal_energy_param,
         "DEP_means": lq_class.timecorr_df.to_dict("index"),
         "rt_correction": lq_class.dt_fit_pars,
-        "cut_fit_pars": lq_class.cut_fit_pars,
+        "cut_fit_pars": lq_class.cut_fit_pars.to_dict(),
         "cut_value": lq_class.cut_val,
         "sfs": lq_class.low_side_sf.to_dict("index"),
     }

From a2f2d7eb7d850f7ae90c2c75835521fd96845a06 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 4 Dec 2024 16:44:30 +0000
Subject: [PATCH 015/101] style: pre-commit fixes

---
 rules/filelist_gen.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index d0356a8..c90c570 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -220,7 +220,7 @@ def get_filelist(
     wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
     file_selection = wildcards.label.split("-", 1)[0]
-    keypart = f'-{wildcards.label.split("-", 1)[1]}'  # remove the file selection from the keypart
+    keypart = f'-{wildcards.label.split("-",1)[1]}'  # remove the file selection from the keypart
     analysis_runs, ignore_keys = get_analysis_runs(
         ignore_keys_file, analysis_runs_file, file_selection
     )

From ce2ad8526e7aad37ec8ff5e38e982d45daa3f120 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 5 Dec 2024 14:46:29 +0100
Subject: [PATCH 016/101] add isotopes where lines are from

---
 scripts/pars_pht_partcal.py | 56 ++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a2d74e4..7b6a4ed 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -166,34 +166,34 @@ def calibrate_partition(
 
     # calibrate
     pk_pars = [
-        # (238.632, (10, 10), pgf.gauss_on_step), #double line
-        # (241.0, (10, 10), pgf.gauss_on_step), #double line
-        (277.371, (10, 7), pgf.gauss_on_linear),
-        (288.2, (7, 10), pgf.gauss_on_linear),
-        (300.1, (10, 10), pgf.gauss_on_linear),
-        (453.0, (10, 10), pgf.gauss_on_linear),
-        # (511, (20, 20), pgf.gauss_on_step), double line
-        (549.8, (10, 10), pgf.gauss_on_linear),
-        (583.187, (20, 20), pgf.hpge_peak),
-        (727.330, (20, 20), pgf.hpge_peak),
-        (763.13, (20, 10), pgf.gauss_on_linear),
-        (785.37, (10, 20), pgf.gauss_on_linear),
-        (860.557, (20, 20), pgf.hpge_peak),
-        (893.408, (20, 20), pgf.gauss_on_linear),
-        (927.6, (20, 20), pgf.gauss_on_linear),
-        (952.120, (20, 20), pgf.gauss_on_linear),
-        (982.7, (20, 20), pgf.gauss_on_linear),
-        (1078.62, (20, 7), pgf.gauss_on_linear),
-        (1093.9, (7, 20), pgf.gauss_on_linear),
-        (1512.7, (20, 20), pgf.gauss_on_linear),
-        (1592.511, (20, 20), pgf.hpge_peak),
-        (1620.50, (20, 20), pgf.hpge_peak),
-        (1679.7, (20, 20), pgf.gauss_on_linear),
-        (1806.0, (20, 20), pgf.gauss_on_linear),
-        (2103.511, (20, 20), pgf.hpge_peak),
-        (2614.511, (40, 20), pgf.hpge_peak),
-        (3125.511, (20, 20), pgf.gauss_on_linear),
-        (3197.7, (20, 20), pgf.gauss_on_linear),
+        # (238.632, (10, 10), pgf.gauss_on_step), #double line, Pb-212
+        # (240.986, (10, 10), pgf.gauss_on_step), #double line, Ra-224
+        (277.371, (10, 7), pgf.gauss_on_linear),  # Tl-208
+        (288.2, (7, 10), pgf.gauss_on_linear),  # Bi-212
+        (300.087, (10, 10), pgf.gauss_on_linear),  # Pb-212
+        (452.98, (10, 10), pgf.gauss_on_linear),  # Bi-212
+        # (511, (20, 20), pgf.gauss_on_step), double line, #e+e-
+        (549.73, (10, 10), pgf.gauss_on_linear),  # Rn-220
+        (583.187, (20, 20), pgf.hpge_peak),  # Tl-208
+        (727.330, (20, 20), pgf.hpge_peak),  # Bi-212
+        (763.13, (20, 10), pgf.gauss_on_linear),  # Tl-208
+        (785.37, (10, 20), pgf.gauss_on_linear),  # Bi-212
+        (860.557, (20, 20), pgf.hpge_peak),  # Tl-208
+        (893.408, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (927.6, (20, 20), pgf.gauss_on_linear),  # Tl-208
+        (952.120, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (982.7, (20, 20), pgf.gauss_on_linear),  # Tl-208
+        (1078.62, (20, 7), pgf.gauss_on_linear),  # Bi-212
+        (1093.9, (7, 20), pgf.gauss_on_linear),  # Tl-208
+        (1512.7, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (1592.511, (20, 20), pgf.hpge_peak),  # Tl-208 DEP
+        (1620.50, (20, 20), pgf.hpge_peak),  # Bi-212
+        (1679.7, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (1806.0, (20, 20), pgf.gauss_on_linear),  # Bi-212
+        (2103.511, (20, 20), pgf.hpge_peak),  # Tl-208 SEP
+        (2614.511, (40, 20), pgf.hpge_peak),  # Tl-208
+        (3125.511, (20, 20), pgf.gauss_on_linear),  # Summation
+        (3197.7, (20, 20), pgf.gauss_on_linear),  # Summation
         (3475.1, (20, 20), pgf.gauss_on_linear),
     ]
 

From 2deac35ff8c30a90eb13835d7f8e0e447ef803e4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 5 Dec 2024 21:03:13 +0100
Subject: [PATCH 017/101] choose ctc based on no_ctc energy instead

---
 scripts/pars_hit_ecal.py    | 2 +-
 scripts/pars_pht_partcal.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index c94041d..43ba644 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -636,7 +636,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             hit_dict.update(
                 {
                     cal_energy_param.replace("_ctc", ""): {
-                        "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
+                        "expression": f"where({cal_energy_param.replace('ctc','noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
                         "parameters": {},
                     }
                 }
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 7b6a4ed..a454d76 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -308,7 +308,7 @@ def calibrate_partition(
                 cal_dicts,
                 {
                     cal_energy_param.replace("_ctc", ""): {
-                        "expression": f"where({cal_energy_param}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
+                        "expression": f"where({cal_energy_param.replace('ctc', 'noctc')}>{kwarg_dict.get('dt_theshold_kev',100)}, {cal_energy_param}, {cal_energy_param.replace('ctc','noctc')})",
                         "parameters": {},
                     }
                 },

From 97a0f8e9f9948c307121d994c3e29d49f46137c3 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 26 Dec 2024 18:47:31 +0100
Subject: [PATCH 018/101] Fix a bunch of docs things

---
 .gitignore                  |  2 ++
 .readthedocs.yaml           | 19 +++++++++++
 docs/source/conf.py         | 68 +++++++++++++++++++++++++++++++++++++
 docs/source/developer.rst   | 28 ++++++++-------
 docs/source/index.rst       | 21 +++++++-----
 docs/source/user_manual.rst | 55 +++++++++++++++++-------------
 6 files changed, 149 insertions(+), 44 deletions(-)
 create mode 100644 .readthedocs.yaml
 create mode 100644 docs/source/conf.py

diff --git a/.gitignore b/.gitignore
index b9905f2..90d9198 100644
--- a/.gitignore
+++ b/.gitignore
@@ -113,3 +113,5 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+docs/source/api
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..afc42e1
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,19 @@
+version: 2
+
+sphinx:
+  configuration: docs/source/conf.py
+
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.12"
+  commands:
+    # FIXME: dependencies should not be explicitly listed here!
+    - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
+    - rm -rf docs/source/api
+    - sphinx-apidoc
+      --private
+      --module-first
+      --force
+      --output-dir docs/source/api
+      scripts
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..013e65b
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,68 @@
+# Configuration file for the Sphinx documentation builder.
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts")
+
+project = "legend-dataflow"
+copyright = "2024, the LEGEND Collaboration"
+
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
+    "sphinx_copybutton",
+    "sphinx_inline_tabs",
+    "myst_parser",
+    "IPython.sphinxext.ipython_console_highlighting",
+]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+master_doc = "index"
+
+# Furo theme
+html_theme = "furo"
+html_theme_options = {
+    "source_repository": "https://github.com/legend-exp/legend-dataflow",
+    "source_branch": "main",
+    "source_directory": "docs/source",
+}
+html_title = f"{project}"
+
+# sphinx-napoleon
+# enforce consistent usage of NumPy-style docstrings
+napoleon_numpy_docstring = True
+napoleon_google_docstring = False
+napoleon_use_ivar = True
+napoleon_use_rtype = False
+
+# intersphinx
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "awkward": ("https://awkward-array.org/doc/stable", None),
+    "numba": ("https://numba.readthedocs.io/en/stable", None),
+    "pandas": ("https://pandas.pydata.org/docs", None),
+    "h5py": ("https://docs.h5py.org/en/stable", None),
+    "pint": ("https://pint.readthedocs.io/en/stable", None),
+    "hist": ("https://hist.readthedocs.io/en/latest", None),
+    "dspeed": ("https://dspeed.readthedocs.io/en/stable", None),
+    "daq2lh5": ("https://legend-daq2lh5.readthedocs.io/en/stable", None),
+    "lgdo": ("https://legend-pydataobj.readthedocs.io/en/stable", None),
+    "dbetto": ("https://dbetto.readthedocs.io/en/stable", None),
+    "pylegendmeta": ("https://pylegendmeta.readthedocs.io/en/stable", None),
+}  # add new intersphinx mappings here
+
+# sphinx-autodoc
+autodoc_default_options = {"ignore-module-all": True}
+# Include __init__() docstring in class docstring
+autoclass_content = "both"
+autodoc_typehints = "description"
+autodoc_typehints_description_target = "documented_params"
+autodoc_typehints_format = "short"
diff --git a/docs/source/developer.rst b/docs/source/developer.rst
index b6d7560..fa8db0e 100644
--- a/docs/source/developer.rst
+++ b/docs/source/developer.rst
@@ -1,15 +1,19 @@
 Developers Guide
-===============
+================
 
-Snakemake is configured around a series of rules which specify how to generate a file/files from a set of input files.
-These rules are defined in the ``Snakefile`` and in the files in the ``rules`` directory.
-In general the structure is that a series of rules are defined to run on some calibration data generation
-a final ``par_{tier}.yaml`` file at the end which can be used by the ``tier``` rule to generate all the files in the tier.
-For most rules there are 2 versions the basic version and the partition version where the first uses a single run
-while the latter will group many runs together.
-This grouping is defined in the ``cal_grouping.yaml`` file in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+Snakemake is configured around a series of rules which specify how to generate
+a file/files from a set of input files.  These rules are defined in the
+``Snakefile`` and in the files in the ``rules`` directory.  In general the
+structure is that a series of rules are defined to run on some calibration data
+generation a final ``par_{tier}.yaml`` file at the end which can be used by the
+``tier``` rule to generate all the files in the tier.  For most rules there are
+2 versions the basic version and the partition version where the first uses a
+single run while the latter will group many runs together.  This grouping is
+defined in the ``cal_grouping.yaml`` file in the `legend-datasets
+<https://github.com/legend-exp/legend-datasets>`_ repository.
 
-Each rule has specified its inputs and outputs along with how to generate which can be
-a shell command or a call to a python function. These scripts are stored in the ``scripts``` directory.
-Additional parameters can also be defined.
-Full details can be found at `snakemake https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html)`_.
+Each rule has specified its inputs and outputs along with how to generate which
+can be a shell command or a call to a python function. These scripts are stored
+in the ``scripts``` directory.  Additional parameters can also be defined.
+Full details can be found at `snakemake
+<https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html>`_.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8534e71..fdf8cad 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,16 +1,18 @@
-Welcome to legend-dataflow's documentation!
-==================================
+legend-dataflow
+===============
 
-*legend-dataflow* is a Python package based on Snakemake `<https://snakemake.readthedocs.io/en/stable/index.html>`_
-for running the data production of LEGEND.
-It is designed to calibrate and optimise hundreds of channels in parallel before
-bringing them all together to process the data. It takes as an input the metadata
-at `legend metadata <https://github.com/legend-exp/legend-metadata>`_.
+*legend-dataflow* is a Python package based on Snakemake
+`<https://snakemake.readthedocs.io/en/stable/index.html>`_ for running the data
+production of LEGEND.  It is designed to calibrate and optimise hundreds of
+channels in parallel before bringing them all together to process the data. It
+takes as an input the metadata at `legend metadata
+<https://github.com/legend-exp/legend-metadata>`_.
 
 Getting started
 ---------------
 
-It is recommended to install and use the package through the `legend-prodenv <https://github.com/legend-exp/legend-prodenv>`_.
+It is recommended to install and use the package through the `legend-prodenv
+<https://github.com/legend-exp/legend-prodenv>`_.
 
 Next steps
 ----------
@@ -23,7 +25,7 @@ Next steps
 .. toctree::
    :maxdepth: 1
 
-   tutorials
+   user_manual
 
 .. toctree::
    :maxdepth: 1
@@ -38,4 +40,5 @@ Next steps
    :maxdepth: 1
    :caption: Development
 
+   developer
    Source Code <https://github.com/legend-exp/legend-dataflow>
diff --git a/docs/source/user_manual.rst b/docs/source/user_manual.rst
index fb3e81b..90f4557 100644
--- a/docs/source/user_manual.rst
+++ b/docs/source/user_manual.rst
@@ -1,3 +1,6 @@
+User Manual
+-----------
+
 Configuration
 =============
 
@@ -13,33 +16,38 @@ the default path to the config file is ``./config.json``.
 Profiles
 ========
 
-A number of profiles are also included in the ``profiles`` directory. If none are specified,
-the default profile is used. The profile can be specified by using the ``--profile`` option
-when running Snakemake. These control how many jobs are run simultaneously, based on how many cores
-are specified and the memory constraints of the system. A full list of all the options
-that can be specified to snakemake can be found at `snakemake <https://snakemake.readthedocs.io/en/stable/executing/cli.html>`_.
+A number of profiles are also included in the ``profiles`` directory. If none
+are specified, the default profile is used. The profile can be specified by
+using the ``--profile`` option when running Snakemake. These control how many
+jobs are run simultaneously, based on how many cores are specified and the
+memory constraints of the system. A full list of all the options that can be
+specified to snakemake can be found at `snakemake
+<https://snakemake.readthedocs.io/en/stable/executing/cli.html>`_.
 
 
 Running the Dataflow
 ====================
 
-To run the dataflow at the most basic level all that is necassary is to tell snakemake the target file
-generation. In a simple case this may just be a single file e.g.
+To run the dataflow at the most basic level all that is necassary is to tell
+snakemake the target file generation. In a simple case this may just be a
+single file e.g.
 ```shell
 $ snakemake /data2/public/prodenv/prod-blind/ref-v1.0.0/generated/tier/dsp/p03/r000/l200-p03-r000-cal-20230401T000000Z-tier_dsp.lh5
 ```
-This would generate the file and all the files that are required to generate it.
-In general though we want to generate a large number of files, and we can do this using the ``gen`` target.
+This would generate the file and all the files that are required to generate
+it.  In general though we want to generate a large number of files, and we can
+do this using the ``gen`` target.
 
 Main output generation
 ======================
 
-Usually, the main output will be determined by a file-list.
-The special output target ``{label}-{tier}.gen`` is used to
-generate all files that follow the label up to the specified tier.
-The label is composed of the following parts:
-- the filelist designator: in most cases this will be ``all``, but other options are specified in the ``runlists.yaml`` file
-in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ repository.
+Usually, the main output will be determined by a file-list.  The special output
+target ``{label}-{tier}.gen`` is used to generate all files that follow the
+label up to the specified tier.  The label is composed of the following parts:
+
+- the filelist designator: in most cases this will be ``all``, but other
+  options are specified in the ``runlists.yaml`` file in the `legend-datasets
+  <https://github.com/legend-exp/legend-datasets>`_ repository.
 - experiment: the experiment name i.e. l200
 - period: the period of the data e.g. p03
 - run: the run number e.g. r000
@@ -47,19 +55,20 @@ in the `legend-datasets <https://github.com/legend-exp/legend-datasets>`_ reposi
 - timestamp: the timestamp of the data e.g. 20230401T000000Z
 
 Example:
+
 ```shell
 $ snakemake all-l200-p03-r001-cal-20230401T000000Z-dsp.gen
 ```
 
-You can specify as many or as few of these as they like e.g. ``all-l200-p03-dsp.gen``
-If you want to specify a lower part of the label but leave a higher part free,
-you can use the ``*``` character e.g. ``all-l200-p03-*-cal-dsp.gen`` .
-Additionally if you want to specify multiple options for a part of the label you can use the ``_`` character between
-e.g. ``all-l200-p03-r000_r001-dsp.gen``.
+You can specify as many or as few of these as they like e.g.
+``all-l200-p03-dsp.gen`` If you want to specify a lower part of the label but
+leave a higher part free, you can use the ``*``` character e.g.
+``all-l200-p03-*-cal-dsp.gen`` .  Additionally if you want to specify multiple
+options for a part of the label you can use the ``_`` character between e.g.
+``all-l200-p03-r000_r001-dsp.gen``.
 
-After the files
-are created, the empty file ``{label}-{tier}.gen```` will be created to
-mark the successful data production.
+After the files are created, the empty file ``{label}-{tier}.gen```` will be
+created to mark the successful data production.
 
 
 Monitoring

From 4c6dffccf9c86362ff7f5069a2248eaa6d5e2311 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 26 Dec 2024 21:23:17 +0100
Subject: [PATCH 019/101] update blinding cal to new hpgecal

---
 scripts/blinding_calibration.py | 44 ++++++++++++++-------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py
index 62207e9..072e756 100644
--- a/scripts/blinding_calibration.py
+++ b/scripts/blinding_calibration.py
@@ -15,21 +15,25 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo import lh5
-from pygama.math.histogram import better_int_binning, get_hist
-from pygama.pargen.energy_cal import hpge_find_E_peaks
+from pygama.pargen.energy_cal import HPGeCalibration
 
 mpl.use("agg")
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--files", help="files", nargs="*", type=str)
+
 argparser.add_argument("--blind_curve", help="blind_curve", type=str)
 argparser.add_argument("--plot_file", help="out plot path", type=str)
+
 argparser.add_argument("--meta", help="meta", type=str)
+argparser.add_argument("--configs", help="configs", type=str)
+argparser.add_argument("--log", help="log", type=str)
+
 argparser.add_argument("--timestamp", help="timestamp", type=str)
 argparser.add_argument("--datatype", help="datatype", type=str)
 argparser.add_argument("--channel", help="channel", type=str)
-argparser.add_argument("--configs", help="configs", type=str)
-argparser.add_argument("--log", help="log", type=str)
+
+argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
 logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
@@ -56,15 +60,19 @@
 dEuc = 1 / guess_keV
 
 # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins)
-Euc_min, Euc_max, nbins = better_int_binning(
-    x_lo=Euc_min, x_hi=Euc_max, n_bins=(Euc_max - Euc_min) / dEuc
+
+
+hpge_cal = HPGeCalibration(
+    "daqenergy",
+    peaks_keV,
+    guess_keV,
+    0,
+    uncal_is_int=True,
+    debug_mode=args.debug,
 )
-hist, bins, var = get_hist(E_uncal, range=(Euc_min, Euc_max), bins=nbins)
 
 # Run the rough peak search
-detected_peaks_locs, detected_peaks_keV, roughpars = hpge_find_E_peaks(
-    hist, bins, var, peaks_keV, n_sigma=5, deg=0
-)
+detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal)
 
 log.info(f"{len(detected_peaks_locs)} peaks found:")
 log.info("\t   Energy   | Position  ")
@@ -98,20 +106,4 @@
     pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
 plt.close()
 
-# else:
-#     out_dict = {
-#         "pars": {
-#             "operations": {
-#                 "daqenergy_cal": {
-#                     "expression": "daqenergy*a",
-#                     "parameters": {"a": np.nan},
-#                 }
-#             }
-#         }
-#     }
-#     fig = plt.figure(figsize=(8, 10))
-#     plt.suptitle(f"{args.channel}-blind_off")
-#     with open(args.plot_file, "wb") as w:
-#         pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
-#     plt.close()
 Props.write_to_file(args.blind_curve, out_dict)

From 08e20e7077016ab6265b6b1aeb99397ad99e6942 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 27 Dec 2024 18:36:28 +0100
Subject: [PATCH 020/101] Try fixing RTD build

---
 .readthedocs.yaml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index afc42e1..4612bfd 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,19 +1,23 @@
 version: 2
 
-sphinx:
-  configuration: docs/source/conf.py
-
 build:
   os: "ubuntu-22.04"
   tools:
     python: "3.12"
   commands:
     # FIXME: dependencies should not be explicitly listed here!
-    - pip install snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
+    - asdf plugin add uv
+    - asdf install uv latest
+    - asdf global uv latest
+    - uv venv
+    - uv pip install
+      snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
     - rm -rf docs/source/api
-    - sphinx-apidoc
+    - .venv/bin/python -m sphinx.ext.apidoc
       --private
       --module-first
       --force
       --output-dir docs/source/api
       scripts
+    - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D
+      language=en docs/source $READTHEDOCS_OUTPUT/html

From 603f3ecbd14de0579420a262bcc5edd574af1204 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 27 Dec 2024 18:44:52 +0100
Subject: [PATCH 021/101] Bug fix

---
 .gitignore          | 2 +-
 docs/Makefile       | 8 +++++++-
 docs/source/conf.py | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 90d9198..4eb2181 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,7 +77,7 @@ instance/
 .scrapy
 
 # Sphinx documentation
-/docs/build/
+/docs/_build/
 /docs/source/generated
 
 # PyBuilder
diff --git a/docs/Makefile b/docs/Makefile
index 9be493d..ff41907 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -3,7 +3,13 @@ SOURCEDIR = source
 BUILDDIR = build
 
 all: apidoc
-	sphinx-build -M html "$(SOURCEDIR)" "$(BUILDDIR)" -W --keep-going
+	sphinx-build \
+      -T \
+      -b html \
+      -d "$(BUILDDIR)"/doctrees \
+      -D language=en \
+       -W --keep-going \
+      "$(SOURCEDIR)" "$(BUILDDIR)"
 
 apidoc: clean-apidoc
 	sphinx-apidoc \
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 013e65b..dfb1a23 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -4,7 +4,7 @@
 import sys
 from pathlib import Path
 
-sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix() / "scripts")
+sys.path.insert(0, Path(__file__).parents[2].resolve().as_posix())
 
 project = "legend-dataflow"
 copyright = "2024, the LEGEND Collaboration"

From 9f4d1c274102e8a5ab8f51a14a0c48dbec8d226b Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 27 Dec 2024 18:46:11 +0100
Subject: [PATCH 022/101] Remove unneeded sphinx ext

---
 docs/source/conf.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index dfb1a23..92ee6c2 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,7 +17,6 @@
     "sphinx_copybutton",
     "sphinx_inline_tabs",
     "myst_parser",
-    "IPython.sphinxext.ipython_console_highlighting",
 ]
 
 source_suffix = {

From 1152316bff97c4ff56d0a4624a1a39586d86ecfa Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 17:09:44 +0100
Subject: [PATCH 023/101] add snakefile to profile

---
 profiles/build-raw/config.yaml   | 1 +
 profiles/default/config.yaml     | 1 +
 profiles/legend-data/config.yaml | 1 +
 3 files changed, 3 insertions(+)

diff --git a/profiles/build-raw/config.yaml b/profiles/build-raw/config.yaml
index 32a0814..4525deb 100644
--- a/profiles/build-raw/config.yaml
+++ b/profiles/build-raw/config.yaml
@@ -4,5 +4,6 @@ max-jobs-per-second: 1
 resources:
   - mem_swap=3500
 configfile: config.json
+snakefile: ./workflow/Snakefile-build-raw
 keep-going: true
 rerun-incomplete: true
diff --git a/profiles/default/config.yaml b/profiles/default/config.yaml
index 6b7ddb0..53a11cd 100644
--- a/profiles/default/config.yaml
+++ b/profiles/default/config.yaml
@@ -1,4 +1,5 @@
 cores: all
 configfile: config.json
+snakefile: ./workflow/Snakefile
 keep-going: true
 rerun-incomplete: true
diff --git a/profiles/legend-data/config.yaml b/profiles/legend-data/config.yaml
index 782e4df..364bdb1 100644
--- a/profiles/legend-data/config.yaml
+++ b/profiles/legend-data/config.yaml
@@ -4,5 +4,6 @@ max-jobs-per-second: 1
 resources:
   - mem_swap=3500
 configfile: config.json
+snakefile: ./workflow/Snakefile
 keep-going: true
 rerun-incomplete: true

From 24fb2ed6907c2b66abd68822a0c977ed200b7b0c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 17:10:18 +0100
Subject: [PATCH 024/101] add table format to config

---
 templates/config.json | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/templates/config.json b/templates/config.json
index d8189ee..0d801ba 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -50,6 +50,18 @@
         "cache": "$_/software/python/cache"
       },
 
+      "table_format": {
+        "raw": "ch{ch:07d}/raw",
+        "dsp": "ch{ch:07d}/dsp",
+        "psp": "ch{ch:07d}/dsp",
+        "hit": "ch{ch:07d}/hit",
+        "pht": "ch{ch:07d}/hit",
+        "evt": "{grp}/evt",
+        "pet": "{grp}/evt",
+        "skm": "{grp}/skm",
+        "tcm": "hardware_tcm_1"
+      },
+
       "execenv": {
         "cmd": "apptainer run",
         "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif",

From c89b634fba5cc0bd42d03a9cac2e54933f19ac9e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 17:11:07 +0100
Subject: [PATCH 025/101] update to cal_groupings file

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 0174479..fd14ffb 100644
--- a/Snakefile
+++ b/Snakefile
@@ -46,7 +46,7 @@ chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
 det_status = det_status_path(setup)
 swenv = runcmd(setup)
-part = ds.CalGrouping(setup, Path(det_status) / "cal_partitions.yaml")
+part = ds.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
 basedir = workflow.basedir
 
 

From 83fc32991810e4f3c47aa4857d420298aee17054 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Sat, 28 Dec 2024 20:13:11 +0100
Subject: [PATCH 026/101] add pyproject file

---
 .readthedocs.yaml |  3 +--
 LICENSE.md        |  4 ++-
 pyproject.toml    | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 3 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 4612bfd..ca8910f 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,8 +10,7 @@ build:
     - asdf install uv latest
     - asdf global uv latest
     - uv venv
-    - uv pip install
-      snakemake pygama pylegendmeta sphinx-inline-tabs sphinx-copybutton furo myst-parser
+    - uv pip install .[docs]
     - rm -rf docs/source/api
     - .venv/bin/python -m sphinx.ext.apidoc
       --private
diff --git a/LICENSE.md b/LICENSE.md
index c4148f9..35d8ee3 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,9 +1,11 @@
-The legend-dataflow-hades package is licensed under the MIT "Expat" License:
+The legend-dataflow package is licensed under the MIT "Expat" License:
 
 > Copyright (c) 2021:
 >
 >    Matteo Agostini <matteo.agostini@ph.tum.de>
 >    Oliver Schulz <oschulz@mpp.mpg.de>
+>    George Marshall <george.marshall.20@ucl.ac.uk>
+>    Luigi Pertoldi <gipert@pm.me>
 >
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..53060c4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,67 @@
+[tool.uv]
+package = false
+
+[tool.uv.workspace]
+exclude = ["rules", "templates", "scripts", "generated", "inputs", "software", "workflow"]
+
+[tool.setuptools]
+py-modules = []
+
+[project]
+name = "legend-dataflow"
+description = "Python package for processing L200 data"
+authors = [
+    {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"},
+    {name = "Luigi Pertoldi", email = "gipert@pm.me"},
+    {name = "The Legend Collaboration"},
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT Expat License",
+    "Operating System :: MacOS",
+    "Operating System :: POSIX",
+    "Operating System :: Unix",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Topic :: Scientific/Engineering",
+]
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "dbetto>=1.0.5",
+    "snakemake>=8",
+]
+dynamic = [
+    "version",
+]
+
+[project.optional-dependencies]
+no_container = [
+  "pygama",
+  "dspeed",
+  "pylegendmeta",
+  "legend-pydataobj",
+  "legend-daq2lh5",
+]
+test = [
+ "legend-dataflow[no_container]",
+  "pytest >=6",
+  "pytest-cov >=3",
+]
+dev = [
+ "legend-dataflow[no_container]",
+  "pytest >=6",
+  "pytest-cov >=3",
+]
+docs = [
+  "legend-dataflow[no_container]",
+  "sphinx>=7.0",
+  "myst_parser>=0.13",
+  "sphinx_inline_tabs",
+  "sphinx_copybutton",
+  "sphinx_autodoc_typehints",
+  "furo>=2023.08.17",
+]

From 7cd02734d919a2dcab5d8dc4d27e42f060147f9f Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 31 Dec 2024 16:27:10 +0100
Subject: [PATCH 027/101] add logging config and cleanup config loading

---
 scripts/build_dsp.py                | 22 ++++++++++-----
 scripts/build_evt.py                | 41 +++++++++++++--------------
 scripts/build_hit.py                | 33 ++++++++++++++--------
 scripts/build_raw.py                | 20 ++++++++++++--
 scripts/build_raw_blind.py          | 35 +++++++++++++----------
 scripts/build_skm.py                | 35 +++++++++++++----------
 scripts/build_tcm.py                | 24 +++++++++-------
 scripts/check_blinding.py           | 28 +++++++++++--------
 scripts/pars_dsp_build_svm.py       | 29 +++++++++++++++----
 scripts/pars_dsp_dplms.py           | 25 +++++++++++------
 scripts/pars_dsp_eopt.py            | 35 +++++++++++------------
 scripts/pars_dsp_event_selection.py | 36 ++++++++++++------------
 scripts/pars_dsp_nopt.py            | 35 +++++++++++------------
 scripts/pars_dsp_svm.py             | 14 ----------
 scripts/pars_dsp_tau.py             | 26 ++++++++++-------
 scripts/pars_hit_aoe.py             | 31 +++++++++++----------
 scripts/pars_hit_ecal.py            | 43 ++++++++++++++++-------------
 scripts/pars_hit_lq.py              | 29 +++++++++++--------
 scripts/pars_hit_qc.py              | 29 +++++++++++--------
 scripts/pars_pht_aoecal.py          | 30 +++++++++++---------
 scripts/pars_pht_fast.py            | 34 +++++++++++++----------
 scripts/pars_pht_lqcal.py           | 30 +++++++++++---------
 scripts/pars_pht_partcal.py         | 30 +++++++++++---------
 scripts/pars_pht_qc.py              | 31 ++++++++++++---------
 scripts/pars_pht_qc_phy.py          | 30 ++++++++++++--------
 scripts/pars_tcm_pulser.py          | 32 ++++++++++++---------
 26 files changed, 454 insertions(+), 333 deletions(-)

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index c505058..f028ea6 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -52,17 +52,25 @@ def replace_list_with_array(dic):
     msg = f"Tier {args.tier} not supported"
     raise ValueError(msg)
 
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
 channel_dict = config_dict["inputs"]["processing_chain"]
 settings_dict = config_dict["options"].get("settings", {})
 if isinstance(settings_dict, str):
     settings_dict = Props.read_from(settings_dict)
-log_config = config_dict["options"]["logging"]
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-log_config = Props.read_from(log_config)
-log_config["handlers"]["file"]["filename"] = args.log
-logging.config.dictConfig(log_config)
-log = logging.getLogger("test")
 
 meta = LegendMetadata(path=args.metadata)
 chan_map = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index a02d9f8..89fd215 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -1,6 +1,7 @@
 import argparse
 import json
 import logging
+import logging.config
 import time
 from pathlib import Path
 
@@ -38,43 +39,43 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 argparser.add_argument("--xtc_file", help="xtc file", type=str)
 argparser.add_argument("--par_files", help="par files", nargs="*")
 
-argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
+argparser.add_argument("--configs", help="configs", type=str, required=True)
 argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
-
 argparser.add_argument("--log", help="log_file", type=str)
 
 argparser.add_argument("--output", help="output file", type=str)
 args = argparser.parse_args()
 
-if args.log is not None:
-    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-else:
-    logging.basicConfig(level=logging.DEBUG)
-
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 # load in config
 configs = TextDB(args.configs, lazy=True)
 if args.tier in ("evt", "pet"):
-    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"][
-        "inputs"
-    ]
-    evt_config_file = config_dict["evt_config"]
+    rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]
+
 else:
     msg = "unknown tier"
     raise ValueError(msg)
 
+config_dict = rule_dict["inputs"]
+evt_config_file = config_dict["evt_config"]
+
+if "logging" in rule_dict["options"]:
+    log_config = rule_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(rule_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
 meta = LegendMetadata(args.metadata, lazy=True)
 chmap = meta.channelmap(args.timestamp)
 
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 3aba4aa..4daa2e5 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import logging.config
 import time
 from pathlib import Path
 
@@ -24,24 +25,32 @@
 argparser.add_argument("--db_file", help="db file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 configs = TextDB(args.configs, lazy=True)
 if args.tier == "hit" or args.tier == "pht":
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"][
-        "inputs"
-    ]["hit_config"]
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"]
 else:
     msg = "unknown tier"
     raise ValueError(msg)
 
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+channel_dict = config_dict["inputs"]["hit_config"]
+settings_dict = config_dict["options"].get("settings", {})
+if isinstance(settings_dict, str):
+    settings_dict = Props.read_from(settings_dict)
+
 meta = LegendMetadata(path=args.metadata)
 chan_map = meta.channelmap(args.timestamp, system=args.datatype)
 
diff --git a/scripts/build_raw.py b/scripts/build_raw.py
index 03a4fca..081768f 100644
--- a/scripts/build_raw.py
+++ b/scripts/build_raw.py
@@ -23,9 +23,23 @@
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"][
-    "inputs"
-]
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+channel_dict = config_dict["inputs"]
 settings = Props.read_from(channel_dict["settings"])
 channel_dict = channel_dict["out_spec"]
 all_config = Props.read_from(channel_dict["gen_config"])
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 33a6c31..1405ecd 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -34,21 +34,26 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-logging.getLogger("lgdo").setLevel(logging.INFO)
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
 configs = TextDB(args.configs, lazy=True)
-channel_dict = configs.on(args.timestamp, system=args.datatype)
-
-hdf_settings = Props.read_from(channel_dict["snakemake_rules"]["tier_raw"]["inputs"]["settings"])[
-    "hdf5_settings"
-]
-blinding_settings = Props.read_from(
-    channel_dict["snakemake_rules"]["tier_raw_blind"]["inputs"]["config"]
-)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+channel_dict = config_dict["inputs"]
+hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"]
+blinding_settings = Props.read_from(config_dict["config"])
 
 centroid = blinding_settings["centroid_in_keV"]  # keV
 width = blinding_settings["width_in_keV"]  # keV
@@ -115,6 +120,7 @@
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0,99999):05d}"
 temp_output = f"{args.output}.{rand_num}"
+Path(temp_output).parent.mkdir(parents=True, exist_ok=True)
 
 for channel in all_channels:
     try:
@@ -166,4 +172,5 @@
     )
 
 # rename the temp file
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 Path(temp_output).rename(args.output)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index 10bf876..058025a 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -30,22 +30,27 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
 argparser.add_argument("--output", help="output file", required=True)
 args = argparser.parse_args()
 
-if args.log is not None:
-    Path(args.log).parent.makedir(parents=True, exist_ok=True)
-
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 # load in config
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-skm_config_file = configs["snakemake_rules"]["tier_skm"]["inputs"]["skm_config"]
-
+config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[
+    "snakemake_rules"
+]["tier_skm"]
+
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
+
+
+skm_config_file = config_dict["inputs"]["skm_config"]
 evt_filter = Props.read_from(skm_config_file)["evt_filter"]
 out_fields = Props.read_from(skm_config_file)["keep_fields"]
 
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index faa39d6..7f9c4a9 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -21,21 +21,25 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["tier_tcm"]
-log_config = config_dict["options"]["logging"]
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-log_config = Props.read_from(log_config)
-log_config["handlers"]["file"]["filename"] = args.log
-logging.config.dictConfig(log_config)
-log = logging.getLogger("test")
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 settings = Props.read_from(config_dict["inputs"]["config"])
 
 rng = np.random.default_rng()
 temp_output = f"{args.output}.{rng.integers(0, 99999):05d}"
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 # get the list of channels by fcid
 ch_list = lh5.ls(args.input, "/ch*")
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 7d6da04..bf2ca93 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -15,16 +15,13 @@
 import matplotlib.pyplot as plt
 import numexpr as ne
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from lgdo.utils import numba_defaults
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
 
 mpl.use("Agg")
-numba_defaults.cache = False
-numba_defaults.boundscheck = False
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--files", help="files", nargs="*", type=str)
@@ -39,14 +36,21 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 # get the usability status for this channel
 chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index 0d6ada7..67607bb 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -3,23 +3,40 @@
 import pickle as pkl
 from pathlib import Path
 
+from legendmeta import TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 from sklearn.svm import SVC
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--configs", help="config file", type=str)
+
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+
 argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
 argparser.add_argument("--train_data", help="input data file", type=str, required=True)
 argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 # Load files
 tb = lh5.read("ml_train/dsp", args.train_data)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 87403b8..8806dbd 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -6,7 +6,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
@@ -31,14 +31,21 @@
 
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index d4f0098..9b4e092 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -10,7 +10,7 @@
 import pygama.pargen.energy_optimisation as om  # noqa: F401
 import sklearn.gaussian_process.kernels as ker
 from dspeed.units import unit_registry as ureg
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
@@ -44,17 +44,22 @@
 argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
-log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
 t0 = time.time()
 
@@ -62,12 +67,8 @@
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-conf = LegendMetadata(path=args.configs)
-configs = conf.on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["processing_chain"][
-    args.channel
-]
-opt_json = configs["snakemake_rules"]["pars_dsp_eopt"]["inputs"]["optimiser_config"][args.channel]
+dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
 
 opt_dict = Props.read_from(opt_json)
 db_dict = Props.read_from(args.decay_const)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index f4dfd7d..7cbabcc 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pygama.math.histogram as pgh
 import pygama.pargen.energy_cal as pgc
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
@@ -96,16 +96,22 @@ def get_out_data(
     argparser.add_argument("--peak_file", help="peak_file", type=str, required=True)
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
-    logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
-    log = logging.getLogger(__name__)
     sto = lh5.LH5Store()
     t0 = time.time()
 
@@ -113,14 +119,8 @@ def get_out_data(
     channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-    conf = LegendMetadata(path=args.configs)
-    configs = conf.on(args.timestamp, system=args.datatype)
-    dsp_config = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"][
-        "processing_chain"
-    ][args.channel]
-    peak_json = configs["snakemake_rules"]["pars_dsp_peak_selection"]["inputs"]["peak_config"][
-        args.channel
-    ]
+    dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+    peak_json = config_dict["inputs"]["peak_config"][args.channel]
 
     peak_dict = Props.read_from(peak_json)
     db_dict = Props.read_from(args.decay_const)
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 5de3a59..9cc96e2 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -7,7 +7,7 @@
 import lgdo.lh5 as lh5
 import numpy as np
 import pygama.pargen.noise_optimization as pno
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
@@ -32,15 +32,21 @@
 
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-logging.getLogger("dspeed.processing_chain").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 
 t0 = time.time()
@@ -49,15 +55,10 @@
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-conf = LegendMetadata(path=args.configs)
-configs = conf.on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["processing_chain"][
-    args.channel
-]
-opt_json = configs["snakemake_rules"]["pars_dsp_nopt"]["inputs"]["optimiser_config"][args.channel]
+dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
 
 opt_dict = Props.read_from(opt_json)
-
 db_dict = Props.read_from(args.database)
 
 if opt_dict.pop("run_nopt") is True:
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py
index 370e320..359bc3f 100644
--- a/scripts/pars_dsp_svm.py
+++ b/scripts/pars_dsp_svm.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 from pathlib import Path
 
 from legendmeta.catalog import Props
@@ -11,19 +10,6 @@
 argparser.add_argument("--svm_file", help="svm file", required=True)
 args = argparser.parse_args()
 
-
-if args.log is not None:
-    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-else:
-    logging.basicConfig(level=logging.DEBUG)
-
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
 par_data = Props.read_from(args.input_file)
 
 file = f"'$_/{Path(args.svm_file).name}'"
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index b8d9a71..a3a3183 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -6,7 +6,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
@@ -32,15 +32,21 @@
 
 sto = lh5.LH5Store()
 
-configs = LegendMetadata(path=args.configs)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["pars_dsp_tau"]
-log_config = config_dict["options"]["logging"]
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-log_config = Props.read_from(log_config)
-log_config["handlers"]["file"]["filename"] = args.log
-logging.config.dictConfig(log_config)
-log = logging.getLogger("test")
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index c30c7ef..6924b39 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
@@ -17,7 +17,6 @@
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -125,23 +124,27 @@ def aoe_calibration(
 argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_hit_aoecal"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "pars_hit_aoecal"
-]["inputs"]["aoecal_config"][args.channel]
-
+channel_dict = config_dict["inputs"]["aoecal_config"][args.channel]
 kwarg_dict = Props.read_from(channel_dict)
 
 
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 43ba644..c16f75c 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -14,7 +14,7 @@
 import numpy as np
 import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
@@ -443,13 +443,28 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+    if args.tier == "hit":
+        config_dict = config_dict["pars_hit_ecal"]
+    elif args.tier == "pht":
+        config_dict = config_dict["pars_pht_ecal"]
+    else:
+        msg = "invalid tier"
+        raise ValueError(msg)
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
@@ -470,17 +485,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 
     hit_dict.update(database_dic[channel]["ctc_params"])
 
-    # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    if args.tier == "hit":
-        channel_dict = channel_dict["pars_hit_ecal"]["inputs"]["ecal_config"][args.channel]
-    elif args.tier == "pht":
-        channel_dict = channel_dict["pars_pht_ecal"]["inputs"]["ecal_config"][args.channel]
-    else:
-        msg = "invalid tier"
-        raise ValueError(msg)
-
+    channel_dict = config_dict["inputs"]["ecal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     # convert plot functions from strings to functions and split off baseline and common plots
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 8625ed3..fbebbba 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
@@ -18,7 +18,6 @@
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -144,22 +143,28 @@ def lq_calibration(
 argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_hit_lqcal"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(path=args.configs)
-channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "pars_hit_lqcal"
-]["inputs"]["lqcal_config"][args.channel]
 
+channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
 kwarg_dict = Props.read_from(channel_dict)
 
 ecal_dict = Props.read_from(args.ecal_file)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 320fee9..f0e681b 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -53,23 +53,28 @@
     argparser.add_argument("--save_path", help="save_path", type=str)
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_hit_qc"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
     # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    channel_dict = channel_dict["pars_hit_qc"]["inputs"]["qc_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["qc_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     if args.overwrite_files:
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index ca938e5..74cf382 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
@@ -269,23 +269,27 @@ def eres_func(x):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_aoecal"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "pars_pht_aoecal"
-    ]["inputs"]["par_pht_aoecal_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["par_pht_aoecal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     cal_dict = {}
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 104ad05..7f3a168 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pars_pht_aoecal import run_aoe_calibration
 from pars_pht_lqcal import run_lq_calibration
@@ -18,7 +18,6 @@
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
@@ -69,13 +68,21 @@ def run_splitter(files):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]
+    if "logging" in config_dict["pars_pht_partcal"]["options"]:
+        log_config = config_dict["pars_pht_partcal"]["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
@@ -122,17 +129,14 @@ def run_splitter(files):
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"]
-
     kwarg_dict = Props.read_from(
-        channel_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel]
+        config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel]
     )
     aoe_kwarg_dict = Props.read_from(
-        channel_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel]
+        config_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel]
     )
     lq_kwarg_dict = Props.read_from(
-        channel_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel]
+        config_dict["pars_pht_lqcal"]["inputs"]["lqcal_config"][args.channel]
     )
 
     params = [
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 2c67745..862711b 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
@@ -266,23 +266,27 @@ def eres_func(x):
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_lqcal"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "pars_pht_lqcal"
-    ]["inputs"]["lqcal_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     cal_dict = {}
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index a454d76..1fad3d3 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -12,7 +12,7 @@
 import pandas as pd
 import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
@@ -429,13 +429,21 @@ def calibrate_partition(
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_partcal"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
@@ -482,11 +490,7 @@ def calibrate_partition(
         timestamp = fk.timestamp
         final_dict[timestamp] = sorted(filelist)
 
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(timestamp, system=args.datatype)["snakemake_rules"][
-        "pars_pht_partcal"
-    ]["inputs"]["pars_pht_partcal_config"][args.channel]
-
+    channel_dict = config_dict["inputs"]["pars_pht_partcal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
 
     params = [
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 495c87b..ac728d7 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -57,22 +57,29 @@
     )
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_qc"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
     # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
+    channel_dict = config_dict["inputs"]["qc_config"][args.channel]
+    kwarg_dict = Props.read_from(channel_dict)
 
     # sort files in dictionary where keys are first timestamp from run
     if isinstance(args.cal_files, list):
@@ -88,8 +95,6 @@
         np.unique(cal_files)
     )  # need this as sometimes files get double counted as it somehow puts in the p%-* filelist and individual runs also
 
-    kwarg_dict = Props.read_from(channel_dict)
-
     if args.overwrite_files:
         overwrite = Props.read_from(args.overwrite_files)
         if channel in overwrite:
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 4f87afb..e308f5e 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -10,7 +10,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -45,22 +45,29 @@
     )
     args = argparser.parse_args()
 
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-    logging.getLogger("numba").setLevel(logging.INFO)
-    logging.getLogger("parse").setLevel(logging.INFO)
-    logging.getLogger("lgdo").setLevel(logging.INFO)
-    logging.getLogger("h5py").setLevel(logging.INFO)
-    logging.getLogger("matplotlib").setLevel(logging.INFO)
-    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_pht_qc"]
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if args.log is not None:
+            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["file"]["filename"] = args.log
+        logging.config.dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if args.log is not None:
+            Path(args.log).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+        log = logging.getLogger(__name__)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{chmap[args.channel].daq.rawid:07}"
 
     # get metadata dictionary
-    configs = LegendMetadata(path=args.configs)
-    channel_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-    channel_dict = channel_dict["pars_pht_qc"]["inputs"]["qc_config"][args.channel]
+    channel_dict = config_dict["qc_config"][args.channel]
+    kwarg_dict = Props.read_from(channel_dict)
 
     sto = lh5.LH5Store()
 
@@ -90,7 +97,6 @@
         puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0]
         bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200)
 
-    kwarg_dict = Props.read_from(channel_dict)
     kwarg_dict_fft = kwarg_dict["fft_fields"]
 
     cut_fields = get_keys(
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 9e6ad42..018e386 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -4,7 +4,7 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata
+from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 
@@ -22,27 +22,33 @@
 argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
 args = argparser.parse_args()
 
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-logging.getLogger("legendmeta").setLevel(logging.INFO)
+configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+config_dict = configs["snakemake_rules"]["pars_tcm_pulser"]
+if "logging" in config_dict["options"]:
+    log_config = config_dict["options"]["logging"]
+    log_config = Props.read_from(log_config)
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        log_config["handlers"]["file"]["filename"] = args.log
+    logging.config.dictConfig(log_config)
+    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+else:
+    if args.log is not None:
+        Path(args.log).parent.makedir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+    log = logging.getLogger(__name__)
 
 sto = lh5.LH5Store()
 log = logging.getLogger(__name__)
 
-configs = LegendMetadata(path=args.configs)
-config_dict = configs.on(args.timestamp, system=args.datatype)
-kwarg_dict = config_dict["snakemake_rules"]["pars_tcm_pulser"]["inputs"]["pulser_config"]
+
+kwarg_dict = config_dict["inputs"]["pulser_config"]
+kwarg_dict = Props.read_from(kwarg_dict)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid}"
 
-kwarg_dict = Props.read_from(kwarg_dict)
-
 if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
     tcm_files = args.tcm_files[0]
     with Path(tcm_files).open() as f:

From 59e273b9fabdb4c51276ceeee4c34328a5481a0c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 31 Dec 2024 16:28:25 +0100
Subject: [PATCH 028/101] add param info to svm rule

---
 rules/dsp.smk | 174 ++++++++++++++++++++++++++------------------------
 rules/psp.smk |   8 +++
 2 files changed, 100 insertions(+), 82 deletions(-)

diff --git a/rules/dsp.smk b/rules/dsp.smk
index 7ae67a7..4d70945 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -234,6 +234,10 @@ rule build_svm_dsp:
         train_data=lambda wildcards: str(
             get_input_par_file(wildcards, "dsp", "svm_hyperpars")
         ).replace("hyperpars.yaml", "train.lh5"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -246,6 +250,10 @@ rule build_svm_dsp:
         "{swenv} python3 -B "
         "{basedir}/../scripts/pars_dsp_build_svm.py "
         "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"
@@ -363,85 +371,87 @@ rule build_pars_dsp_db:
         "--channelmap {meta} "
 
 
-# rule build_pars_dsp:
-#     input:
-#         in_files=lambda wildcards: get_par_chanlist(
-#             setup,
-#             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-#             "dsp",
-#             basedir,
-#             det_status,
-#             chan_maps,
-#             name="dplms",
-#             extension="lh5",
-#         ),
-#         in_db=get_pattern_pars_tmp(
-#             setup,
-#             "dsp",
-#             datatype="cal",
-#         ),
-#         plts=get_pattern_plts(setup, "dsp"),
-#         objects=get_pattern_pars(
-#             setup,
-#             "dsp",
-#             name="objects",
-#             extension="dir",
-#             check_in_cycle=check_in_cycle,
-#         ),
-#     params:
-#         timestamp="{timestamp}",
-#         datatype="cal",
-#     output:
-#         out_file=get_pattern_pars(
-#             setup,
-#             "dsp",
-#             extension="lh5",
-#             check_in_cycle=check_in_cycle,
-#         ),
-#         out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
-#     group:
-#         "merge-dsp"
-#     shell:
-#         "{swenv} python3 -B "
-#         "{basedir}/../scripts/merge_channels.py "
-#         "--output {output.out_file} "
-#         "--in_db {input.in_db} "
-#         "--out_db {output.out_db} "
-#         "--input {input.in_files} "
-#         "--timestamp {params.timestamp} "
-#         "--channelmap {meta} "
-# rule build_dsp:
-#     input:
-#         raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
-#         pars_file=ancient(
-#             lambda wildcards: ParsCatalog.get_par_file(
-#                 setup, wildcards.timestamp, "dsp"
-#             )
-#         ),
-#     params:
-#         timestamp="{timestamp}",
-#         datatype="{datatype}",
-#         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-#     output:
-#         tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
-#         db_file=get_pattern_pars_tmp(setup, "dsp_db"),
-#     log:
-#         get_pattern_log(setup, "tier_dsp"),
-#     group:
-#         "tier-dsp"
-#     resources:
-#         runtime=300,
-#         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
-#     shell:
-#         "{swenv} python3 -B "
-#         "{basedir}/../scripts/build_dsp.py "
-#         "--log {log} "
-#         "--tier dsp "
-#         f"--configs {ro(configs)} "
-#         "--metadata {meta} "
-#         "--datatype {params.datatype} "
-#         "--timestamp {params.timestamp} "
-#         "--input {params.ro_input[raw_file]} "
-#         "--output {output.tier_file} "
-#         "--db_file {output.db_file} "
-#         "--pars_file {params.ro_input[pars_file]} "
+rule build_pars_dsp:
+    input:
+        in_files=lambda wildcards: get_par_chanlist(
+            setup,
+            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+            "dsp",
+            basedir,
+            det_status,
+            chan_maps,
+            name="dplms",
+            extension="lh5",
+        ),
+        in_db=get_pattern_pars_tmp(
+            setup,
+            "dsp",
+            datatype="cal",
+        ),
+        plts=get_pattern_plts(setup, "dsp"),
+        objects=get_pattern_pars(
+            setup,
+            "dsp",
+            name="objects",
+            extension="dir",
+            check_in_cycle=check_in_cycle,
+        ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+    output:
+        out_file=get_pattern_pars(
+            setup,
+            "dsp",
+            extension="lh5",
+            check_in_cycle=check_in_cycle,
+        ),
+        out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
+    group:
+        "merge-dsp"
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/merge_channels.py "
+        "--output {output.out_file} "
+        "--in_db {input.in_db} "
+        "--out_db {output.out_db} "
+        "--input {input.in_files} "
+        "--timestamp {params.timestamp} "
+        "--channelmap {meta} "
+
+
+rule build_dsp:
+    input:
+        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
+        pars_file=ancient(
+            lambda wildcards: ParsCatalog.get_par_file(
+                setup, wildcards.timestamp, "dsp"
+            )
+        ),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
+    output:
+        tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(setup, "dsp_db"),
+    log:
+        get_pattern_log(setup, "tier_dsp"),
+    group:
+        "tier-dsp"
+    resources:
+        runtime=300,
+        mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/build_dsp.py "
+        "--log {log} "
+        "--tier dsp "
+        f"--configs {ro(configs)} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--input {params.ro_input[raw_file]} "
+        "--output {output.tier_file} "
+        "--db_file {output.db_file} "
+        "--pars_file {params.ro_input[pars_file]} "
diff --git a/rules/psp.smk b/rules/psp.smk
index 9fc0861..5505f27 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -185,6 +185,10 @@ rule build_svm_psp:
         train_data=lambda wildcards: str(
             get_input_par_file(wildcards, "psp", "svm_hyperpars")
         ).replace("hyperpars.yaml", "train.lh5"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -197,6 +201,10 @@ rule build_svm_psp:
         "{swenv} python3 -B "
         "{basedir}/../scripts/pars_dsp_build_svm.py "
         "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"

From 2cc123246f58eb9b06eeb37ad7eb2b31ee98bed2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 8 Jan 2025 17:04:34 +0100
Subject: [PATCH 029/101] move logging to function

---
 scripts/build_dsp.py                | 18 ++----------------
 scripts/build_evt.py                | 21 ++++-----------------
 scripts/build_hit.py                | 17 ++---------------
 scripts/build_raw.py                | 15 ++-------------
 scripts/build_raw_blind.py          | 16 ++--------------
 scripts/build_skm.py                | 17 ++---------------
 scripts/build_tcm.py                | 18 +++---------------
 scripts/check_blinding.py           | 17 +++--------------
 scripts/pars_dsp_build_svm.py       | 19 +++----------------
 scripts/pars_dsp_dplms.py           | 22 ++++++----------------
 scripts/pars_dsp_eopt.py            | 17 +++--------------
 scripts/pars_dsp_event_selection.py | 17 +++--------------
 scripts/pars_dsp_nopt.py            | 17 +++--------------
 scripts/pars_dsp_tau.py             | 18 +++---------------
 scripts/pars_hit_aoe.py             | 17 +++--------------
 scripts/pars_hit_ecal.py            | 16 +++-------------
 scripts/pars_hit_lq.py              | 17 +++--------------
 scripts/pars_hit_qc.py              | 16 +++-------------
 scripts/pars_pht_aoecal.py          | 16 +++-------------
 scripts/pars_pht_fast.py            | 17 +++--------------
 scripts/pars_pht_lqcal.py           | 18 +++---------------
 scripts/pars_pht_partcal.py         | 18 +++---------------
 scripts/pars_pht_qc.py              | 16 +++-------------
 scripts/pars_pht_qc_phy.py          | 16 +++-------------
 scripts/pars_tcm_pulser.py          | 16 +++-------------
 scripts/util/log.py                 | 28 ++++++++++++++++++++++++++++
 26 files changed, 102 insertions(+), 358 deletions(-)
 create mode 100644 scripts/util/log.py

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index f028ea6..f7b4141 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 import re
 import time
 import warnings
@@ -11,6 +9,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from utils.log import build_log
 
 
 def replace_list_with_array(dic):
@@ -52,20 +51,7 @@ def replace_list_with_array(dic):
     msg = f"Tier {args.tier} not supported"
     raise ValueError(msg)
 
-
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]["processing_chain"]
 settings_dict = config_dict["options"].get("settings", {})
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index 89fd215..e56912b 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -1,16 +1,15 @@
 import argparse
 import json
-import logging
-import logging.config
 import time
 from pathlib import Path
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import Props, TextDB
+from legendmeta import LegendMetadata
 from lgdo.types import Array
 from pygama.evt import build_evt
+from util.log import build_log
 
 sto = lh5.LH5Store()
 
@@ -62,19 +61,7 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 config_dict = rule_dict["inputs"]
 evt_config_file = config_dict["evt_config"]
 
-if "logging" in rule_dict["options"]:
-    log_config = rule_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(rule_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(rule_dict, args.log)
 
 meta = LegendMetadata(args.metadata, lazy=True)
 chmap = meta.channelmap(args.timestamp)
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 4daa2e5..cec39b7 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 import time
 from pathlib import Path
 
@@ -8,6 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
@@ -32,19 +31,7 @@
     msg = "unknown tier"
     raise ValueError(msg)
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]["hit_config"]
 settings_dict = config_dict["options"].get("settings", {})
diff --git a/scripts/build_raw.py b/scripts/build_raw.py
index 081768f..7e1dd1b 100644
--- a/scripts/build_raw.py
+++ b/scripts/build_raw.py
@@ -6,6 +6,7 @@
 from daq2lh5 import build_raw
 from legendmeta import TextDB
 from legendmeta.catalog import Props
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
@@ -25,19 +26,7 @@
 configs = TextDB(args.configs, lazy=True)
 config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]
 settings = Props.read_from(channel_dict["settings"])
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 1405ecd..0957c7a 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -11,7 +11,6 @@
 """
 
 import argparse
-import logging
 from pathlib import Path
 
 import numexpr as ne
@@ -19,6 +18,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
@@ -37,19 +37,7 @@
 configs = TextDB(args.configs, lazy=True)
 config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 channel_dict = config_dict["inputs"]
 hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"]
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index 058025a..c8ff972 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -1,12 +1,11 @@
 import argparse
-import logging
-from pathlib import Path
 
 import awkward as ak
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
+from utils.log import build_log
 
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
@@ -35,19 +34,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
     "snakemake_rules"
 ]["tier_skm"]
 
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+log = build_log(config_dict, args.log)
 
 
 skm_config_file = config_dict["inputs"]["skm_config"]
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 7f9c4a9..3ddf5dd 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 from pathlib import Path
 
 import lgdo.lh5 as lh5
@@ -9,6 +7,7 @@
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from pygama.evt.build_tcm import build_tcm
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
@@ -21,19 +20,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["tier_tcm"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 settings = Props.read_from(config_dict["inputs"]["config"])
 
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index bf2ca93..44261a5 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -7,7 +7,6 @@
 """
 
 import argparse
-import logging
 import pickle as pkl
 from pathlib import Path
 
@@ -20,6 +19,7 @@
 from lgdo import lh5
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
+from utils.log import build_log
 
 mpl.use("Agg")
 
@@ -38,19 +38,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 # get the usability status for this channel
 chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index 67607bb..a31a8c1 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 from pathlib import Path
 
@@ -7,6 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from sklearn.svm import SVC
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
@@ -14,8 +14,6 @@
 
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
 
 argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
 argparser.add_argument("--train_data", help="input data file", type=str, required=True)
@@ -24,19 +22,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 # Load files
 tb = lh5.read("ml_train/dsp", args.train_data)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 8806dbd..64c7a9f 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -10,6 +10,7 @@
 from legendmeta.catalog import Props
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
@@ -32,20 +33,9 @@
 args = argparser.parse_args()
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+config_dict = configs["snakemake_rules"]["pars_dsp_dplms"]
+
+log = build_log(config_dict, args.log)
 
 log = logging.getLogger(__name__)
 sto = lh5.LH5Store()
@@ -55,9 +45,9 @@
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
 configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-dsp_config = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["proc_chain"][args.channel]
+dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
 
-dplms_json = configs["snakemake_rules"]["pars_dsp_dplms"]["inputs"]["dplms_pars"][args.channel]
+dplms_json = config_dict["inputs"]["dplms_pars"][args.channel]
 dplms_dict = Props.read_from(dplms_json)
 
 db_dict = Props.read_from(args.database)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 9b4e092..5e9a009 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 import time
 import warnings
@@ -18,6 +17,7 @@
     run_bayesian_optimisation,
     run_one_dsp,
 )
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -46,19 +46,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 sto = lh5.LH5Store()
 t0 = time.time()
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 7cbabcc..9999134 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -1,6 +1,5 @@
 import argparse
 import json
-import logging
 import time
 import warnings
 from bisect import bisect_left
@@ -15,6 +14,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -98,19 +98,8 @@ def get_out_data(
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_dsp_peak_selection"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     sto = lh5.LH5Store()
     t0 = time.time()
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 9cc96e2..85883b8 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 import time
 from pathlib import Path
@@ -11,6 +10,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
+from utils.log import build_log
 
 sto = lh5.LH5Store()
 
@@ -34,19 +34,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 
 t0 = time.time()
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index a3a3183..4f3cf9d 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -1,6 +1,4 @@
 import argparse
-import logging
-import logging.config
 import pickle as pkl
 from pathlib import Path
 
@@ -11,6 +9,7 @@
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
@@ -34,19 +33,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index 6924b39..4d3f503 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import argparse
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -16,6 +15,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -126,19 +126,8 @@ def aoe_calibration(
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_hit_aoecal"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index c16f75c..aab5f41 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -23,6 +23,7 @@
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
@@ -452,19 +453,8 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     else:
         msg = "invalid tier"
         raise ValueError(msg)
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index fbebbba..3487c38 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import argparse
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -17,6 +16,7 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -145,19 +145,8 @@ def lq_calibration(
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_hit_lqcal"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 meta = LegendMetadata(path=args.metadata)
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index f0e681b..6b3369f 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -19,6 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 
@@ -55,19 +56,8 @@
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_hit_qc"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 74cf382..91ae176 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -18,6 +18,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
@@ -271,19 +272,8 @@ def eres_func(x):
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_aoecal"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 7f3a168..b8d48d2 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -2,7 +2,6 @@
 
 import argparse
 import json
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -17,6 +16,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -70,19 +70,8 @@ def run_splitter(files):
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]
-    if "logging" in config_dict["pars_pht_partcal"]["options"]:
-        log_config = config_dict["pars_pht_partcal"]["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["pars_pht_partcal"]["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict["pars_pht_partcal"], args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 862711b..101acea 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -3,7 +3,6 @@
 import argparse
 import copy
 import json
-import logging
 import pickle as pkl
 import warnings
 from pathlib import Path
@@ -19,8 +18,8 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -268,19 +267,8 @@ def eres_func(x):
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_lqcal"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 1fad3d3..6eb25eb 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -2,7 +2,6 @@
 
 import argparse
 import copy
-import logging
 import pickle as pkl
 import re
 import warnings
@@ -19,8 +18,8 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
+from utils.log import build_log
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
@@ -431,19 +430,8 @@ def calibrate_partition(
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_partcal"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index ac728d7..f3f634b 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -19,6 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 
@@ -59,19 +60,8 @@
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_qc"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index e308f5e..e642aa3 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -18,6 +18,7 @@
     get_keys,
 )
 from util.convert_np import convert_dict_np_to_float
+from utils.log import build_log
 
 log = logging.getLogger(__name__)
 
@@ -47,19 +48,8 @@
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_qc"]
-    if "logging" in config_dict["options"]:
-        log_config = config_dict["options"]["logging"]
-        log_config = Props.read_from(log_config)
-        if args.log is not None:
-            Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-            log_config["handlers"]["file"]["filename"] = args.log
-        logging.config.dictConfig(log_config)
-        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-    else:
-        if args.log is not None:
-            Path(args.log).parent.makedir(parents=True, exist_ok=True)
-            logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-        log = logging.getLogger(__name__)
+
+    log = build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 018e386..4ae8843 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -7,6 +7,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
@@ -24,19 +25,8 @@
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
 config_dict = configs["snakemake_rules"]["pars_tcm_pulser"]
-if "logging" in config_dict["options"]:
-    log_config = config_dict["options"]["logging"]
-    log_config = Props.read_from(log_config)
-    if args.log is not None:
-        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-        log_config["handlers"]["file"]["filename"] = args.log
-    logging.config.dictConfig(log_config)
-    log = logging.getLogger(config_dict["options"].get("logger", "prod"))
-else:
-    if args.log is not None:
-        Path(args.log).parent.makedir(parents=True, exist_ok=True)
-        logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-    log = logging.getLogger(__name__)
+
+log = build_log(config_dict, args.log)
 
 sto = lh5.LH5Store()
 log = logging.getLogger(__name__)
diff --git a/scripts/util/log.py b/scripts/util/log.py
new file mode 100644
index 0000000..79b97c5
--- /dev/null
+++ b/scripts/util/log.py
@@ -0,0 +1,28 @@
+import logging
+from logging.config import dictConfig
+from pathlib import Path
+
+from dbetto import Props
+
+
+def build_log(config_dict, log_file=None):
+    if "logging" in config_dict["options"]:
+        log_config = config_dict["options"]["logging"]
+        log_config = Props.read_from(log_config)
+        if log_file is not None:
+            Path(log_file).parent.mkdir(parents=True, exist_ok=True)
+            log_config["handlers"]["dynamic"] = {
+                "class": "logging.FileHandler",
+                "level": "DEBUG",
+                "formatter": "simple",
+                "filename": log_file,
+                "mode": "a",
+            }
+        dictConfig(log_config)
+        log = logging.getLogger(config_dict["options"].get("logger", "prod"))
+    else:
+        if log_file is not None:
+            Path(log_file).parent.makedir(parents=True, exist_ok=True)
+            logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w")
+        log = logging.getLogger(__name__)
+    return log

From 72140e2b6eca848fbd909cc1e8c65943a89245ed Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 8 Jan 2025 17:04:54 +0100
Subject: [PATCH 030/101] fix svm rules

---
 Snakefile     | 18 +++++++++---------
 rules/dsp.smk |  2 --
 rules/psp.smk |  2 --
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/Snakefile b/Snakefile
index fd14ffb..0d8dc94 100644
--- a/Snakefile
+++ b/Snakefile
@@ -134,15 +134,15 @@ onsuccess:
         if os.path.isfile(file):
             os.remove(file)
 
-            #     # remove filelists
-            # files = glob.glob(os.path.join(filelist_path(setup), "*"))
-            # for file in files:
-            #     if os.path.isfile(file):
-            #         os.remove(file)
-            # if os.path.exists(filelist_path(setup)):
-            #     os.rmdir(filelist_path(setup))
-
-            # remove logs
+            # remove filelists
+    files = glob.glob(os.path.join(filelist_path(setup), "*"))
+    for file in files:
+        if os.path.isfile(file):
+            os.remove(file)
+    if os.path.exists(filelist_path(setup)):
+        os.rmdir(filelist_path(setup))
+
+        # remove logs
     files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 4d70945..66a18c8 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -237,7 +237,6 @@ rule build_svm_dsp:
     params:
         timestamp="{timestamp}",
         datatype="cal",
-        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
@@ -253,7 +252,6 @@ rule build_svm_dsp:
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"
diff --git a/rules/psp.smk b/rules/psp.smk
index 5505f27..bde834d 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -188,7 +188,6 @@ rule build_svm_psp:
     params:
         timestamp="{timestamp}",
         datatype="cal",
-        channel="{channel}",
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
@@ -204,7 +203,6 @@ rule build_svm_psp:
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
         "--output_file {output.dsp_pars}"

From 5139f183695a2377cd8d94b3fa12e68c58060227 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 8 Jan 2025 18:45:57 +0100
Subject: [PATCH 031/101] add dbetto dependency to configs

---
 templates/config-nersc.json | 1 +
 templates/config.json       | 1 +
 2 files changed, 2 insertions(+)

diff --git a/templates/config-nersc.json b/templates/config-nersc.json
index 5d0c927..9df4fe7 100644
--- a/templates/config-nersc.json
+++ b/templates/config-nersc.json
@@ -74,6 +74,7 @@
       "pkg_versions": {
         "pygama": "pygama==2.0.3",
         "pylegendmeta": "pylegendmeta==0.10.2",
+        "dbetto": "dbetto==1.0.6",
         "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.10.0",
         "legend-daq2lh5": "legend-daq2lh5==1.2.1",
diff --git a/templates/config.json b/templates/config.json
index 0d801ba..17f4bbf 100644
--- a/templates/config.json
+++ b/templates/config.json
@@ -76,6 +76,7 @@
       "pkg_versions": {
         "pygama": "pygama==2.0.3",
         "pylegendmeta": "pylegendmeta==1.1.0",
+        "dbetto": "dbetto==1.0.6",
         "dspeed": "dspeed==1.6.1",
         "legend-pydataobj": "legend-pydataobj==1.9.0",
         "legend-daq2lh5": "legend-daq2lh5==1.2.2",

From 4dea2743a895cd904f02799b7ef77b45402cfc19 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 16:26:03 +0100
Subject: [PATCH 032/101] Fix bugs in complete_run.py

---
 scripts/complete_run.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index fe800e8..e4c5eb3 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -131,8 +131,9 @@ def get_run(Filekey):
     return key_dict
 
 
-def build_valid_keys(input_files, output_dir):
-    infiles = Path(as_ro(input_files)).glob()
+def build_valid_keys(input_files_regex, output_dir):
+    in_regex = Path(as_ro(input_files_regex))
+    infiles = in_regex.parent.glob(in_regex.name)
     key_dict = get_keys(infiles)
 
     for key in list(key_dict):
@@ -254,9 +255,8 @@ def tdirs(tier):
 
 
 def fformat(tier):
-    return as_ro(
-        patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False)
-    ).replace(as_ro(ut.get_tier_path(snakemake.params.setup, tier)), "")
+    abs_path = patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False)
+    return str(abs_path).replace(ut.get_tier_path(snakemake.params.setup, tier), "")
 
 
 file_db_config |= {
@@ -267,7 +267,7 @@ def fformat(tier):
 if snakemake.wildcards.tier != "daq":
     print(f"INFO: ...building FileDBs with {snakemake.threads} threads")
 
-    Path(snakemake.params.filedb_path).parent.makedirs(parents=True, exist_ok=True)
+    Path(snakemake.params.filedb_path).mkdir(parents=True, exist_ok=True)
 
     with (Path(snakemake.params.filedb_path) / "file_db_config.json").open("w") as f:
         json.dump(file_db_config, f, indent=2)

From 0c4392440fec4ab3b40b807613aa3acfe94430c3 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 16:35:46 +0100
Subject: [PATCH 033/101] Support using specialized build_raw script depending
 on DAQ extension

---
 Snakefile                |  1 -
 rules/common.smk         |  4 +--
 rules/raw.smk            | 65 +++++++++++++++++++++-------------------
 scripts/util/patterns.py | 11 ++++---
 4 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/Snakefile b/Snakefile
index 0d8dc94..3a44ece 100644
--- a/Snakefile
+++ b/Snakefile
@@ -12,7 +12,6 @@ This includes:
 
 from pathlib import Path
 import os
-import json
 import sys
 import glob
 from datetime import datetime
diff --git a/rules/common.smk b/rules/common.smk
index 6ba4654..ad1d7c2 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -114,8 +114,8 @@ def get_tier_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_unsorted_data(setup)
+        return get_pattern_unsorted_data(setup, extension="*")
     elif tier == "raw":
-        return get_pattern_tier_daq(setup)
+        return get_pattern_tier_daq(setup, extension="*")
     else:
         return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/raw.smk b/rules/raw.smk
index 8239519..59054ce 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -4,36 +4,41 @@ from scripts.util.patterns import (
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
+from scripts.util.utils import set_last_rule_name
 
 
-rule build_raw:
-    """
-    This rule runs build raw, it takes in a daq file and outputs a raw file
-    """
-    input:
-        get_pattern_tier_daq(setup),
-    params:
-        timestamp="{timestamp}",
-        datatype="{datatype}",
-        ro_input=lambda _, input: ro(input),
-    output:
-        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
-    log:
-        get_pattern_log(setup, "tier_raw"),
-    group:
-        "tier-raw"
-    resources:
-        mem_swap=110,
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_raw.py "
-        "--log {log} "
-        f"--configs {ro(configs)} "
-        f"--chan_maps {ro(chan_maps)} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "{params.ro_input} {output}"
+for daq_ext in ("orca", "fcio"):
+
+    rule:
+        """
+        This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+        """
+        input:
+            get_pattern_tier_daq(setup, extension=daq_ext),
+        params:
+            timestamp="{timestamp}",
+            datatype="{datatype}",
+            ro_input=lambda _, input: ro(input),
+        output:
+            get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+        log:
+            get_pattern_log(setup, "tier_raw"),
+        group:
+            "tier-raw"
+        resources:
+            mem_swap=110,
+            runtime=300,
+        shell:
+            "{swenv} python3 -B "
+            "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py "
+            "--log {log} "
+            f"--configs {ro(configs)} "
+            f"--chan_maps {ro(chan_maps)} "
+            "--datatype {params.datatype} "
+            "--timestamp {params.timestamp} "
+            "{params.ro_input} {output}"
+
+    set_last_rule_name(workflow, f"build_raw_{daq_ext}")
 
 
 rule build_raw_blind:
@@ -42,7 +47,7 @@ rule build_raw_blind:
     and runs only if the blinding check file is on disk. Output is just the blinded raw file.
     """
     input:
-        tier_file=get_pattern_tier(setup, "raw", check_in_cycle=False).replace(
+        tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace(
             "{datatype}", "phy"
         ),
         blind_file=get_blinding_curve_file,
@@ -53,7 +58,7 @@ rule build_raw_blind:
     output:
         get_pattern_tier_raw_blind(setup),
     log:
-        get_pattern_log(setup, "tier_raw_blind").replace("{datatype}", "phy"),
+        str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"),
     group:
         "tier-raw"
     resources:
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index 1bfc9f7..e44aa33 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -53,23 +53,22 @@ def full_channel_pattern_with_extension():
     return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}"
 
 
-def get_pattern_unsorted_data(setup):
+def get_pattern_unsorted_data(setup, extension="orca"):
     if sandbox_path(setup) is not None:
-        return (
-            Path(f"{sandbox_path(setup)}")
-            / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
+        return Path(f"{sandbox_path(setup)}") / (
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension
         )
     else:
         return None
 
 
-def get_pattern_tier_daq(setup):
+def get_pattern_tier_daq(setup, extension="orca"):
     return (
         Path(f"{tier_daq_path(setup)}")
         / "{datatype}"
         / "{period}"
         / "{run}"
-        / "{experiment}-{period}-{run}-{datatype}-{timestamp}.orca"
+        / ("{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension)
     )
 
 

From 8eba704089dee0d8de5dd8f260be3c9103ee1263 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 16:36:38 +0100
Subject: [PATCH 034/101] Updates to build_raw Snakefile to support latest
 dataflow changes

Commented broken pars catalog stuff for now
---
 Snakefile-build-raw | 69 +++++++++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 98bd579..95d4a87 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -6,11 +6,10 @@ to the blinded raw data. It handles:
 - blinding the physics data
 """
 
-import pathlib, os, json, sys
+import pathlib, os, sys
 from scripts.util.patterns import (
     get_pattern_unsorted_data,
     get_pattern_tier_daq,
-    get_pattern_tier_raw,
 )
 from scripts.util.utils import (
     subst_vars_in_snakemake_config,
@@ -20,8 +19,8 @@ from scripts.util.utils import (
     filelist_path,
     pars_path,
     metadata_path,
+    det_status_path,
 )
-from scripts.util.pars_loading import pars_catalog
 import scripts.util as ds
 
 check_in_cycle = True
@@ -36,16 +35,17 @@ configs = config_path(setup)
 chan_maps = chan_map_path(setup)
 swenv = runcmd(setup)
 meta = metadata_path(setup)
+det_status = det_status_path(setup)
 
 basedir = workflow.basedir
 
 
 wildcard_constraints:
-    experiment="\w+",
-    period="p\d{2}",
-    run="r\d{3}",
-    datatype="\w{3}",
-    timestamp="\d{8}T\d{6}Z",
+    experiment=r"\w+",
+    period=r"p\d{2}",
+    run=r"r\d{3}",
+    datatype=r"\w{3}",
+    timestamp=r"\d{8}T\d{6}Z",
 
 
 localrules:
@@ -53,25 +53,26 @@ localrules:
     autogen_output,
 
 
-raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
-    ["-*-*-*-cal"],
-    [
-        get_pattern_unsorted_data(setup),
-        get_pattern_tier_daq(setup),
-        get_pattern_tier_raw(setup),
-    ],
-    {"cal": ["par_raw"]},
-)
+# raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
+#     ["-*-*-*-cal"],
+#     [
+#         get_pattern_unsorted_data(setup),
+#         get_pattern_tier_daq(setup),
+#         get_pattern_tier(setup, "raw"),
+#     ],
+#     {"cal": ["par_raw"]},
+# )
 
 
 onstart:
     print("Starting workflow")
 
-    raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
-    if os.path.isfile(raw_par_cat_file):
-        os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
-    pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+    # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
+    # if os.path.isfile(raw_par_cat_file):
+    #     os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
+    # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
+    # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+
 
 
 onsuccess:
@@ -88,20 +89,29 @@ include: "rules/blinding_check.smk"
 
 
 rule gen_filelist:
-    """
-    Generate file list.
+    """Generate file list.
+
+    It is a checkpoint so when it is run it will update the dag passed on the
+    files it finds as an output. It does this by taking in the search pattern,
+    using this to find all the files that match this pattern, deriving the keys
+    from the files found and generating the list of new files needed.
     """
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_tier_pattern(wildcards.tier),
-            ignore_keys_file=os.path.join(configs, "empty_keys.keylist"),
-            analysis_runs_file=None,
+            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
+            analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        os.path.join(filelist_path(setup), "{label}-{tier}.filelist"),
+        temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
+        if len(input) == 0:
+            print(
+                f"WARNING: No files found for the given pattern:{wildcards.label}",
+                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
+            )
         with open(output[0], "w") as f:
             for fn in input:
                 f.write(f"{fn}\n")
@@ -118,3 +128,6 @@ rule sort_data:
         get_pattern_tier_daq(setup),
     shell:
         "mv {input} {output}"
+
+
+# vim: ft=snakemake

From e565e59f2b596508475fb3076baa0c87e0614374 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Fri, 17 Jan 2025 18:16:02 +0100
Subject: [PATCH 035/101] extension="*" does not work as expected, needs to be
 fixed in some other way

---
 Snakefile              | 14 ++++++++------
 Snakefile-build-raw    | 21 +++++++++------------
 rules/common.smk       | 12 ------------
 rules/filelist_gen.smk |  8 +++++---
 4 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3a44ece..3a66e0a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -157,10 +157,10 @@ onsuccess:
 rule gen_filelist:
     """Generate file list.
 
-    It is a checkpoint so when it is run it will update the dag passed on the
-    files it finds as an output. It does this by taking in the search pattern,
-    using this to find all the files that match this pattern, deriving the keys
-    from the files found and generating the list of new files needed.
+    This rule is used as a "checkpoint", so when it is run it will update the
+    DAG based on the files it finds. It does this by taking in the search
+    pattern, using this to find all the files that match this pattern, deriving
+    the keys from the files found and generating the list of new files needed.
     """
     input:
         lambda wildcards: get_filelist(
@@ -173,10 +173,12 @@ rule gen_filelist:
     output:
         temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
+        print(f"INFO: found {len(input)} files")
         if len(input) == 0:
             print(
-                f"WARNING: No files found for the given pattern:{wildcards.label}",
-                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
+                f"WARNING: No files found for the given pattern:{wildcards.label}. "
+                "make sure pattern follows the format: "
+                "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
             )
         with open(output[0], "w") as f:
             for fn in input:
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 95d4a87..ef05855 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -88,29 +88,26 @@ include: "rules/raw.smk"
 include: "rules/blinding_check.smk"
 
 
+# FIXME: cannot put extension="*", otherwise it won't be possible to extract
+# keys (see FileKey.get_path_from_filekey())
 rule gen_filelist:
-    """Generate file list.
-
-    It is a checkpoint so when it is run it will update the dag passed on the
-    files it finds as an output. It does this by taking in the search pattern,
-    using this to find all the files that match this pattern, deriving the keys
-    from the files found and generating the list of new files needed.
-    """
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            get_pattern_unsorted_data(setup, extension="fcio"),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
         temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
     run:
+        print(f"INFO: found {len(input)} files")
         if len(input) == 0:
             print(
-                f"WARNING: No files found for the given pattern:{wildcards.label}",
-                "\nmake sure pattern follows the format: all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen",
+                f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. "
+                "make sure patterns follows the format: "
+                "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
             )
         with open(output[0], "w") as f:
             for fn in input:
@@ -123,9 +120,9 @@ rule sort_data:
     to the sorted dirs under generated
     """
     input:
-        get_pattern_unsorted_data(setup),
+        get_pattern_unsorted_data(setup, extension="fcio"),
     output:
-        get_pattern_tier_daq(setup),
+        get_pattern_tier_daq(setup, extension="fcio"),
     shell:
         "mv {input} {output}"
 
diff --git a/rules/common.smk b/rules/common.smk
index ad1d7c2..1f09470 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -107,15 +107,3 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
         return out_files
-
-
-def get_tier_pattern(tier):
-    """
-    This func gets the search pattern for the relevant tier passed.
-    """
-    if tier == "daq":
-        return get_pattern_unsorted_data(setup, extension="*")
-    elif tier == "raw":
-        return get_pattern_tier_daq(setup, extension="*")
-    else:
-        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index c90c570..e30b876 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -148,8 +148,9 @@ def build_filelist(
     analysis_runs=None,
 ):
     """
-    This function builds the filelist for the given filekeys, search pattern and tier.
-    It will ignore any keys in the ignore_keys list and only include the keys specified in the analysis_runs dict
+    This function builds the filelist for the given filekeys, search pattern
+    and tier. It will ignore any keys in the ignore_keys list and only include
+    the keys specified in the analysis_runs dict.
     """
     fn_pattern = get_pattern(setup, tier)
 
@@ -220,7 +221,8 @@ def get_filelist(
     wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
     file_selection = wildcards.label.split("-", 1)[0]
-    keypart = f'-{wildcards.label.split("-",1)[1]}'  # remove the file selection from the keypart
+    # remove the file selection from the keypart
+    keypart = f'-{wildcards.label.split("-",1)[1]}'
     analysis_runs, ignore_keys = get_analysis_runs(
         ignore_keys_file, analysis_runs_file, file_selection
     )

From 0be642ff57645491eb2d1724e3ddebb9a562d034 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Sat, 18 Jan 2025 17:52:53 +0100
Subject: [PATCH 036/101] Renaming, JIT compile daq2lh5 onstart

---
 Snakefile-build-raw                         | 16 ++--
 rules/common.smk                            |  4 -
 scripts/build_raw_fcio.py                   | 89 +++++++++++++++++++++
 scripts/{build_raw.py => build_raw_orca.py} |  0
 scripts/util/patterns.py                    |  2 +-
 5 files changed, 98 insertions(+), 13 deletions(-)
 create mode 100644 scripts/build_raw_fcio.py
 rename scripts/{build_raw.py => build_raw_orca.py} (100%)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index ef05855..fd9e795 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -7,10 +7,7 @@ to the blinded raw data. It handles:
 """
 
 import pathlib, os, sys
-from scripts.util.patterns import (
-    get_pattern_unsorted_data,
-    get_pattern_tier_daq,
-)
+from scripts.util import patterns as patt
 from scripts.util.utils import (
     subst_vars_in_snakemake_config,
     runcmd,
@@ -65,7 +62,10 @@ localrules:
 
 
 onstart:
-    print("Starting workflow")
+    print("INFO: starting workflow")
+
+    # Make sure some packages are initialized before we begin to avoid race conditions
+    shell('{swenv} python3 -B -c "import daq2lh5 "')
 
     # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
     # if os.path.isfile(raw_par_cat_file):
@@ -95,7 +95,7 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_unsorted_data(setup, extension="fcio"),
+            patt.get_pattern_tier_daq(setup, extension="fcio"),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
@@ -120,9 +120,9 @@ rule sort_data:
     to the sorted dirs under generated
     """
     input:
-        get_pattern_unsorted_data(setup, extension="fcio"),
+        patt.get_pattern_tier_daq_unsorted(setup, extension="fcio"),
     output:
-        get_pattern_tier_daq(setup, extension="fcio"),
+        patt.get_pattern_tier_daq(setup, extension="fcio"),
     shell:
         "mv {input} {output}"
 
diff --git a/rules/common.smk b/rules/common.smk
index 1f09470..a259601 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -6,10 +6,6 @@ from pathlib import Path
 from scripts.util.patterns import (
     par_overwrite_path,
     get_pars_path,
-    get_pattern_unsorted_data,
-    get_pattern_tier_daq,
-    get_pattern_tier,
-    get_pattern_plts_tmp_channel,
 )
 from scripts.util import ProcessingFileKey
 from scripts.util.catalog import Catalog
diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py
new file mode 100644
index 0000000..7f17329
--- /dev/null
+++ b/scripts/build_raw_fcio.py
@@ -0,0 +1,89 @@
+import argparse
+import logging
+from pathlib import Path
+
+import numpy as np
+from daq2lh5 import build_raw
+from legendmeta import TextDB
+from legendmeta.catalog import Props
+from utils.log import build_log
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument("input", help="input file", type=str)
+argparser.add_argument("output", help="output file", type=str)
+argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+argparser.add_argument("--configs", help="config file", type=str)
+argparser.add_argument("--chan_maps", help="chan map", type=str)
+argparser.add_argument("--log", help="log file", type=str)
+args = argparser.parse_args()
+
+Path(args.log).parent.makedir(parents=True, exist_ok=True)
+logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+
+Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+configs = TextDB(args.configs, lazy=True)
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+
+log = build_log(config_dict, args.log)
+
+channel_dict = config_dict["inputs"]
+settings = Props.read_from(channel_dict["settings"])
+channel_dict = channel_dict["out_spec"]
+all_config = Props.read_from(channel_dict["gen_config"])
+
+chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system")
+
+if "geds_config" in list(channel_dict):
+    ged_config = Props.read_from(channel_dict["geds_config"])
+
+    ged_channels = list(
+        chmap.geds.map("daq.rawid")
+    )
+
+    ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
+    Props.add_to(all_config, ged_config)
+
+if "spms_config" in list(channel_dict):
+    spm_config = Props.read_from(channel_dict["spms_config"])
+
+    spm_channels = list(
+        chmap.spms.map("daq.rawid")
+    )
+
+    spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
+    Props.add_to(all_config, spm_config)
+
+if "auxs_config" in list(channel_dict):
+    aux_config = Props.read_from(channel_dict["auxs_config"])
+    aux_channels = list(
+        chmap.auxs.map("daq.rawid")
+    )
+    aux_channels += list(
+        chmap.puls.map("daq.rawid")
+    )
+    aux_channels += list(
+        chmap.bsln.map("daq.rawid")
+    )
+    top_key = next(iter(aux_config))
+    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels)
+    Props.add_to(all_config, aux_config)
+
+if "muon_config" in list(channel_dict):
+    muon_config = Props.read_from(channel_dict["muon_config"])
+    muon_channels = list(
+        chmap.muon.map("daq.rawid")
+    )
+    top_key = next(iter(muon_config))
+    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels)
+    Props.add_to(all_config, muon_config)
+
+rng = np.random.default_rng()
+rand_num = f"{rng.integers(0,99999):05d}"
+temp_output = f"{args.output}.{rand_num}"
+
+build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
+
+# rename the temp file
+Path(temp_output).rename(args.output)
diff --git a/scripts/build_raw.py b/scripts/build_raw_orca.py
similarity index 100%
rename from scripts/build_raw.py
rename to scripts/build_raw_orca.py
diff --git a/scripts/util/patterns.py b/scripts/util/patterns.py
index e44aa33..28d27db 100644
--- a/scripts/util/patterns.py
+++ b/scripts/util/patterns.py
@@ -53,7 +53,7 @@ def full_channel_pattern_with_extension():
     return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}.{ext}"
 
 
-def get_pattern_unsorted_data(setup, extension="orca"):
+def get_pattern_tier_daq_unsorted(setup, extension="orca"):
     if sandbox_path(setup) is not None:
         return Path(f"{sandbox_path(setup)}") / (
             "{experiment}-{period}-{run}-{datatype}-{timestamp}." + extension

From 4dcd0d2ee04d954f4be68215282b686660aea770 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Mon, 20 Jan 2025 12:40:49 +0100
Subject: [PATCH 037/101] Several fixes to build_raw.py scripts

---
 scripts/build_raw_blind.py |  2 +-
 scripts/build_raw_fcio.py  | 78 ++++++++++++++------------------------
 scripts/build_raw_orca.py  |  8 ++--
 scripts/util/log.py        |  2 +-
 4 files changed, 34 insertions(+), 56 deletions(-)

diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 0957c7a..0b036dd 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -18,7 +18,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py
index 7f17329..b4d2e22 100644
--- a/scripts/build_raw_fcio.py
+++ b/scripts/build_raw_fcio.py
@@ -1,12 +1,12 @@
 import argparse
-import logging
+from copy import deepcopy
 from pathlib import Path
 
 import numpy as np
 from daq2lh5 import build_raw
-from legendmeta import TextDB
-from legendmeta.catalog import Props
-from utils.log import build_log
+from dbetto import TextDB
+from dbetto.catalog import Props
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
@@ -18,66 +18,44 @@
 argparser.add_argument("--log", help="log file", type=str)
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
-configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+config_dict = (
+    TextDB(args.configs, lazy=True)
+    .on(args.timestamp, system=args.datatype)
+    .snakemake_rules.tier_raw_fcio
+)
 
 log = build_log(config_dict, args.log)
 
-channel_dict = config_dict["inputs"]
-settings = Props.read_from(channel_dict["settings"])
-channel_dict = channel_dict["out_spec"]
-all_config = Props.read_from(channel_dict["gen_config"])
+channel_dict = config_dict.inputs
+settings = Props.read_from(channel_dict.settings)
+channel_dict = channel_dict.out_spec
+all_config = Props.read_from(channel_dict.gen_config)
 
 chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system")
 
-if "geds_config" in list(channel_dict):
-    ged_config = Props.read_from(channel_dict["geds_config"])
-
-    ged_channels = list(
-        chmap.geds.map("daq.rawid")
-    )
+if "geds_config" in channel_dict:
+    raise NotImplementedError()
 
-    ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
-    Props.add_to(all_config, ged_config)
+if "spms_config" in channel_dict:
+    spm_config = Props.read_from(channel_dict.spms_config)
+    spm_channels = chmap.spms.map("daq.rawid")
 
-if "spms_config" in list(channel_dict):
-    spm_config = Props.read_from(channel_dict["spms_config"])
+    for rawid, chinfo in spm_channels.items():
+        cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"])
+        cfg_block["key_list"] = [chinfo.daq.fc_channel]
+        spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block
 
-    spm_channels = list(
-        chmap.spms.map("daq.rawid")
-    )
+    spm_config["FCEventDecoder"].pop("__output_table_name__")
 
-    spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
     Props.add_to(all_config, spm_config)
 
-if "auxs_config" in list(channel_dict):
-    aux_config = Props.read_from(channel_dict["auxs_config"])
-    aux_channels = list(
-        chmap.auxs.map("daq.rawid")
-    )
-    aux_channels += list(
-        chmap.puls.map("daq.rawid")
-    )
-    aux_channels += list(
-        chmap.bsln.map("daq.rawid")
-    )
-    top_key = next(iter(aux_config))
-    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels)
-    Props.add_to(all_config, aux_config)
-
-if "muon_config" in list(channel_dict):
-    muon_config = Props.read_from(channel_dict["muon_config"])
-    muon_channels = list(
-        chmap.muon.map("daq.rawid")
-    )
-    top_key = next(iter(muon_config))
-    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels)
-    Props.add_to(all_config, muon_config)
+if "auxs_config" in channel_dict:
+    raise NotImplementedError()
+
+if "muon_config" in channel_dict:
+    raise NotImplementedError()
 
 rng = np.random.default_rng()
 rand_num = f"{rng.integers(0,99999):05d}"
diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py
index 7e1dd1b..b307b01 100644
--- a/scripts/build_raw_orca.py
+++ b/scripts/build_raw_orca.py
@@ -4,8 +4,8 @@
 
 import numpy as np
 from daq2lh5 import build_raw
-from legendmeta import TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
 from utils.log import build_log
 
 argparser = argparse.ArgumentParser()
@@ -15,10 +15,10 @@
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--configs", help="config file", type=str)
 argparser.add_argument("--chan_maps", help="chan map", type=str)
-argparser.add_argument("--log", help="log file", type=str)
+argparser.add_argument("--log", help="log file")
 args = argparser.parse_args()
 
-Path(args.log).parent.makedir(parents=True, exist_ok=True)
+Path(args.log).parent.mkdir(parents=True, exist_ok=True)
 logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
 
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
diff --git a/scripts/util/log.py b/scripts/util/log.py
index 79b97c5..9a9b191 100644
--- a/scripts/util/log.py
+++ b/scripts/util/log.py
@@ -22,7 +22,7 @@ def build_log(config_dict, log_file=None):
         log = logging.getLogger(config_dict["options"].get("logger", "prod"))
     else:
         if log_file is not None:
-            Path(log_file).parent.makedir(parents=True, exist_ok=True)
+            Path(log_file).parent.mkdir(parents=True, exist_ok=True)
             logging.basicConfig(level=logging.INFO, filename=log_file, filemode="w")
         log = logging.getLogger(__name__)
     return log

From 3c2a166554630057cec669b6434ac54bc63b48dc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 13:02:35 +0100
Subject: [PATCH 038/101] allow filelist globbing for daq fcio/orca files

---
 Snakefile              |  2 +-
 Snakefile-build-raw    |  4 +---
 rules/common.smk       | 12 ++++++++++++
 rules/filelist_gen.smk | 14 ++++++++++----
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3a66e0a..945b4dd 100644
--- a/Snakefile
+++ b/Snakefile
@@ -166,7 +166,7 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index ef05855..2ace6f7 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -88,14 +88,12 @@ include: "rules/raw.smk"
 include: "rules/blinding_check.smk"
 
 
-# FIXME: cannot put extension="*", otherwise it won't be possible to extract
-# keys (see FileKey.get_path_from_filekey())
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_unsorted_data(setup, extension="fcio"),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/rules/common.smk b/rules/common.smk
index 1f09470..da79753 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -107,3 +107,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
         return out_files
+
+
+def get_search_pattern(tier):
+    """
+    This func gets the search pattern for the relevant tier passed.
+    """
+    if tier == "daq":
+        return get_pattern_unsorted_data(setup, extension="*")
+    elif tier == "raw":
+        return get_pattern_tier_daq(setup, extension="*")
+    else:
+        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index e30b876..7975fa8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -3,7 +3,11 @@ import json, yaml
 from pathlib import Path
 
 from scripts.util.FileKey import FileKey, run_grouper
-from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
+from scripts.util.patterns import (
+    get_pattern_tier,
+    get_pattern_tier_raw_blind,
+    get_pattern_tier_daq,
+)
 
 concat_datatypes = ["phy"]
 concat_tiers = ["skm", "pet_concat", "evt_concat"]
@@ -114,6 +118,8 @@ def get_pattern(setup, tier):
         fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
         fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
+    elif tier == "daq":
+        fn_pattern = get_pattern_tier_daq(setup, extension="{ext}")
     else:
         fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
     return fn_pattern
@@ -158,13 +164,13 @@ def build_filelist(
         ignore_keys = []
     if analysis_runs is None:
         analysis_runs = {}
-
     phy_filenames = []
     other_filenames = []
     for key in filekeys:
-        fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0]
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+        fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
-
         for f in files:
             _key = FileKey.get_filekey_from_pattern(f, search_pattern)
             if _key.name in ignore_keys:

From 1dcd0274c0c288cece654dc47b62ae671526a3cc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 16:46:55 +0100
Subject: [PATCH 039/101] have par catalog build support multiple file
 extensions, split out build raw rule into orca and fcio

---
 Snakefile-build-raw                 | 36 +++++------
 rules/filelist_gen.smk              |  2 +-
 rules/raw.smk                       | 98 ++++++++++++++++++++---------
 scripts/util/create_pars_keylist.py | 10 ++-
 4 files changed, 92 insertions(+), 54 deletions(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index e6c7c62..2635a5d 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -50,15 +50,11 @@ localrules:
     autogen_output,
 
 
-# raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
-#     ["-*-*-*-cal"],
-#     [
-#         get_pattern_unsorted_data(setup),
-#         get_pattern_tier_daq(setup),
-#         get_pattern_tier(setup, "raw"),
-#     ],
-#     {"cal": ["par_raw"]},
-# )
+include: "rules/common.smk"
+include: "rules/filelist_gen.smk"
+include: "rules/main.smk"
+include: "rules/raw.smk"
+include: "rules/blinding_check.smk"
 
 
 onstart:
@@ -67,12 +63,17 @@ onstart:
     # Make sure some packages are initialized before we begin to avoid race conditions
     shell('{swenv} python3 -B -c "import daq2lh5 "')
 
-    # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
-    # if os.path.isfile(raw_par_cat_file):
-    #     os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
-    # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+    raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
+    if os.path.isfile(raw_par_cat_file):
+        os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
+    try:
 
+        pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(
+            parents=True, exist_ok=True
+        )
+        ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
+    except NameError:
+        pass
 
 
 onsuccess:
@@ -81,13 +82,6 @@ onsuccess:
     shell(f"rm {filelist_path(setup)}/* || true")
 
 
-include: "rules/common.smk"
-include: "rules/filelist_gen.smk"
-include: "rules/main.smk"
-include: "rules/raw.smk"
-include: "rules/blinding_check.smk"
-
-
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 7975fa8..24a94f5 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -168,7 +168,7 @@ def build_filelist(
     other_filenames = []
     for key in filekeys:
         if Path(search_pattern).suffix == ".*":
-            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
         fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
         for f in files:
diff --git a/rules/raw.smk b/rules/raw.smk
index 59054ce..fd95467 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,44 +1,82 @@
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
 from scripts.util.utils import set_last_rule_name
+from scripts.util.create_pars_keylist import ParsKeyResolve
+
+raw_par_catalog = ParsKeyResolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    [
+        get_pattern_tier_daq_unsorted(setup, extension="*"),
+        get_pattern_tier_daq(setup, extension="*"),
+        get_pattern_tier(setup, "raw", check_in_cycle=False),
+    ],
+    {"cal": ["par_raw"]},
+)
 
 
-for daq_ext in ("orca", "fcio"):
+rule build_raw_orca:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="orca"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_orca.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
-    rule:
-        """
-        This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
-        """
-        input:
-            get_pattern_tier_daq(setup, extension=daq_ext),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-            ro_input=lambda _, input: ro(input),
-        output:
-            get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
-        log:
-            get_pattern_log(setup, "tier_raw"),
-        group:
-            "tier-raw"
-        resources:
-            mem_swap=110,
-            runtime=300,
-        shell:
-            "{swenv} python3 -B "
-            "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py "
-            "--log {log} "
-            f"--configs {ro(configs)} "
-            f"--chan_maps {ro(chan_maps)} "
-            "--datatype {params.datatype} "
-            "--timestamp {params.timestamp} "
-            "{params.ro_input} {output}"
 
-    set_last_rule_name(workflow, f"build_raw_{daq_ext}")
+rule build_raw_fcio:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="fcio"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_fcio.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
 
 rule build_raw_blind:
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index c3e1f22..a82ef0c 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict):
     @staticmethod
     def get_keys(keypart, search_pattern):
         d = FileKey.parse_keypart(keypart)
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
+            wildcard_dict = dict(ext="*", **d._asdict())
+        else:
+            wildcard_dict = d._asdict()
         try:
             tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
-
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
-        fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
+        fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0]
         p = Path(fn_glob_pattern)
         parts = p.parts[p.is_absolute() :]
         files = Path(p.root).glob(str(Path(*parts)))
@@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern):
             m = tier_pattern_rx.match(str(f))
             if m is not None:
                 d = m.groupdict()
+                if "ext" in d:
+                    d.pop("ext")
                 key = FileKey(**d)
                 keys.append(key)
         return keys

From 0438539594fae88597baf3edea099be3b293829a Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 17:50:40 +0100
Subject: [PATCH 040/101] fix par catalog write

---
 Snakefile-build-raw | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 2635a5d..7a4779f 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -19,6 +19,7 @@ from scripts.util.utils import (
     det_status_path,
 )
 import scripts.util as ds
+from scripts.util.create_pars_keylist import ParsKeyResolve
 
 check_in_cycle = True
 
@@ -71,7 +72,7 @@ onstart:
         pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(
             parents=True, exist_ok=True
         )
-        ds.pars_key_resolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
+        ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
     except NameError:
         pass
 

From 25a6183e9416437ff7617d7403f1749be9810ea1 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 18:28:13 +0100
Subject: [PATCH 041/101] fix daq filelist

---
 rules/common.smk       | 5 ++++-
 rules/filelist_gen.smk | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/rules/common.smk b/rules/common.smk
index 288d06c..5625c79 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,6 +4,9 @@ Helper functions for running data production
 
 from pathlib import Path
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
+    get_pattern_tier_daq,
+    get_pattern_tier,
     par_overwrite_path,
     get_pars_path,
 )
@@ -110,7 +113,7 @@ def get_search_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_unsorted_data(setup, extension="*")
+        return get_pattern_tier_daq_unsorted(setup, extension="*")
     elif tier == "raw":
         return get_pattern_tier_daq(setup, extension="*")
     else:
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 24a94f5..b3255f8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -184,6 +184,10 @@ def build_filelist(
                     filename = FileKey.get_path_from_filekey(
                         _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
                     )
+                elif tier == "daq":
+                    filename = FileKey.get_path_from_filekey(
+                        _key, fn_pattern.with_suffix(Path(f).suffix)
+                    )
                 else:
                     filename = FileKey.get_path_from_filekey(_key, fn_pattern)
 

From 325c92039d69c21607a672e3b11c01cc589aa4cd Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 13:02:35 +0100
Subject: [PATCH 042/101] allow filelist globbing for daq fcio/orca files

---
 Snakefile              |  2 +-
 Snakefile-build-raw    |  4 +---
 rules/common.smk       | 12 ++++++++++++
 rules/filelist_gen.smk | 14 ++++++++++----
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3a66e0a..945b4dd 100644
--- a/Snakefile
+++ b/Snakefile
@@ -166,7 +166,7 @@ rule gen_filelist:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            get_pattern_tier(setup, "raw", check_in_cycle=False),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index fd9e795..e6c7c62 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -88,14 +88,12 @@ include: "rules/raw.smk"
 include: "rules/blinding_check.smk"
 
 
-# FIXME: cannot put extension="*", otherwise it won't be possible to extract
-# keys (see FileKey.get_path_from_filekey())
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
             wildcards,
             setup,
-            patt.get_pattern_tier_daq(setup, extension="fcio"),
+            get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
diff --git a/rules/common.smk b/rules/common.smk
index a259601..288d06c 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -103,3 +103,15 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
         return out_files
+
+
+def get_search_pattern(tier):
+    """
+    This func gets the search pattern for the relevant tier passed.
+    """
+    if tier == "daq":
+        return get_pattern_unsorted_data(setup, extension="*")
+    elif tier == "raw":
+        return get_pattern_tier_daq(setup, extension="*")
+    else:
+        return get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index e30b876..7975fa8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -3,7 +3,11 @@ import json, yaml
 from pathlib import Path
 
 from scripts.util.FileKey import FileKey, run_grouper
-from scripts.util.patterns import get_pattern_tier, get_pattern_tier_raw_blind
+from scripts.util.patterns import (
+    get_pattern_tier,
+    get_pattern_tier_raw_blind,
+    get_pattern_tier_daq,
+)
 
 concat_datatypes = ["phy"]
 concat_tiers = ["skm", "pet_concat", "evt_concat"]
@@ -114,6 +118,8 @@ def get_pattern(setup, tier):
         fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
         fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
+    elif tier == "daq":
+        fn_pattern = get_pattern_tier_daq(setup, extension="{ext}")
     else:
         fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
     return fn_pattern
@@ -158,13 +164,13 @@ def build_filelist(
         ignore_keys = []
     if analysis_runs is None:
         analysis_runs = {}
-
     phy_filenames = []
     other_filenames = []
     for key in filekeys:
-        fn_glob_pattern = key.get_path_from_filekey(search_pattern)[0]
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+        fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
-
         for f in files:
             _key = FileKey.get_filekey_from_pattern(f, search_pattern)
             if _key.name in ignore_keys:

From 8197a3f94b08f5c3a95a1fd61abe12f0b1f666c2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 16:46:55 +0100
Subject: [PATCH 043/101] have par catalog build support multiple file
 extensions, split out build raw rule into orca and fcio

---
 Snakefile-build-raw                 | 37 +++++------
 rules/common.smk                    |  5 +-
 rules/filelist_gen.smk              |  6 +-
 rules/raw.smk                       | 98 ++++++++++++++++++++---------
 scripts/util/create_pars_keylist.py | 10 ++-
 5 files changed, 101 insertions(+), 55 deletions(-)

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index e6c7c62..7a4779f 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -19,6 +19,7 @@ from scripts.util.utils import (
     det_status_path,
 )
 import scripts.util as ds
+from scripts.util.create_pars_keylist import ParsKeyResolve
 
 check_in_cycle = True
 
@@ -50,15 +51,11 @@ localrules:
     autogen_output,
 
 
-# raw_par_catalog = ds.pars_key_resolve.get_par_catalog(
-#     ["-*-*-*-cal"],
-#     [
-#         get_pattern_unsorted_data(setup),
-#         get_pattern_tier_daq(setup),
-#         get_pattern_tier(setup, "raw"),
-#     ],
-#     {"cal": ["par_raw"]},
-# )
+include: "rules/common.smk"
+include: "rules/filelist_gen.smk"
+include: "rules/main.smk"
+include: "rules/raw.smk"
+include: "rules/blinding_check.smk"
 
 
 onstart:
@@ -67,12 +64,17 @@ onstart:
     # Make sure some packages are initialized before we begin to avoid race conditions
     shell('{swenv} python3 -B -c "import daq2lh5 "')
 
-    # raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
-    # if os.path.isfile(raw_par_cat_file):
-    #     os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
-    # pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(parents=True, exist_ok=True)
-    # ds.pars_key_resolve.write_to_jsonl(raw_par_catalog, raw_par_cat_file)
+    raw_par_cat_file = os.path.join(pars_path(setup), "raw", "validity.jsonl")
+    if os.path.isfile(raw_par_cat_file):
+        os.remove(os.path.join(pars_path(setup), "raw", "validity.jsonl"))
+    try:
 
+        pathlib.Path(os.path.dirname(raw_par_cat_file)).mkdir(
+            parents=True, exist_ok=True
+        )
+        ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
+    except NameError:
+        pass
 
 
 onsuccess:
@@ -81,13 +83,6 @@ onsuccess:
     shell(f"rm {filelist_path(setup)}/* || true")
 
 
-include: "rules/common.smk"
-include: "rules/filelist_gen.smk"
-include: "rules/main.smk"
-include: "rules/raw.smk"
-include: "rules/blinding_check.smk"
-
-
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
diff --git a/rules/common.smk b/rules/common.smk
index 288d06c..5625c79 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -4,6 +4,9 @@ Helper functions for running data production
 
 from pathlib import Path
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
+    get_pattern_tier_daq,
+    get_pattern_tier,
     par_overwrite_path,
     get_pars_path,
 )
@@ -110,7 +113,7 @@ def get_search_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_unsorted_data(setup, extension="*")
+        return get_pattern_tier_daq_unsorted(setup, extension="*")
     elif tier == "raw":
         return get_pattern_tier_daq(setup, extension="*")
     else:
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 7975fa8..b3255f8 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -168,7 +168,7 @@ def build_filelist(
     other_filenames = []
     for key in filekeys:
         if Path(search_pattern).suffix == ".*":
-            search_pattern = Path(str(search_pattern).replace(".*", ".{ext}"))
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
         fn_glob_pattern = key.get_path_from_filekey(search_pattern, ext="*")[0]
         files = glob.glob(fn_glob_pattern)
         for f in files:
@@ -184,6 +184,10 @@ def build_filelist(
                     filename = FileKey.get_path_from_filekey(
                         _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
                     )
+                elif tier == "daq":
+                    filename = FileKey.get_path_from_filekey(
+                        _key, fn_pattern.with_suffix(Path(f).suffix)
+                    )
                 else:
                     filename = FileKey.get_path_from_filekey(_key, fn_pattern)
 
diff --git a/rules/raw.smk b/rules/raw.smk
index 59054ce..fd95467 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,44 +1,82 @@
 from scripts.util.patterns import (
+    get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
 from scripts.util.utils import set_last_rule_name
+from scripts.util.create_pars_keylist import ParsKeyResolve
+
+raw_par_catalog = ParsKeyResolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    [
+        get_pattern_tier_daq_unsorted(setup, extension="*"),
+        get_pattern_tier_daq(setup, extension="*"),
+        get_pattern_tier(setup, "raw", check_in_cycle=False),
+    ],
+    {"cal": ["par_raw"]},
+)
 
 
-for daq_ext in ("orca", "fcio"):
+rule build_raw_orca:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="orca"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_orca.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
-    rule:
-        """
-        This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
-        """
-        input:
-            get_pattern_tier_daq(setup, extension=daq_ext),
-        params:
-            timestamp="{timestamp}",
-            datatype="{datatype}",
-            ro_input=lambda _, input: ro(input),
-        output:
-            get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
-        log:
-            get_pattern_log(setup, "tier_raw"),
-        group:
-            "tier-raw"
-        resources:
-            mem_swap=110,
-            runtime=300,
-        shell:
-            "{swenv} python3 -B "
-            "{basedir}" + f"/../scripts/build_raw_{daq_ext}.py "
-            "--log {log} "
-            f"--configs {ro(configs)} "
-            f"--chan_maps {ro(chan_maps)} "
-            "--datatype {params.datatype} "
-            "--timestamp {params.timestamp} "
-            "{params.ro_input} {output}"
 
-    set_last_rule_name(workflow, f"build_raw_{daq_ext}")
+rule build_raw_fcio:
+    """
+    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    """
+    input:
+        get_pattern_tier_daq(setup, extension="fcio"),
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        ro_input=lambda _, input: ro(input),
+    output:
+        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+    log:
+        get_pattern_log(setup, "tier_raw"),
+    group:
+        "tier-raw"
+    resources:
+        mem_swap=110,
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}" + f"/../scripts/build_raw_fcio.py "
+        "--log {log} "
+        f"--configs {ro(configs)} "
+        f"--chan_maps {ro(chan_maps)} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "{params.ro_input} {output}"
 
 
 rule build_raw_blind:
diff --git a/scripts/util/create_pars_keylist.py b/scripts/util/create_pars_keylist.py
index c3e1f22..a82ef0c 100644
--- a/scripts/util/create_pars_keylist.py
+++ b/scripts/util/create_pars_keylist.py
@@ -99,12 +99,16 @@ def match_all_entries(entrylist, name_dict):
     @staticmethod
     def get_keys(keypart, search_pattern):
         d = FileKey.parse_keypart(keypart)
+        if Path(search_pattern).suffix == ".*":
+            search_pattern = Path(search_pattern).with_suffix(".{ext}")
+            wildcard_dict = dict(ext="*", **d._asdict())
+        else:
+            wildcard_dict = d._asdict()
         try:
             tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
-
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
-        fn_glob_pattern = smk.io.expand(search_pattern, **d._asdict())[0]
+        fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0]
         p = Path(fn_glob_pattern)
         parts = p.parts[p.is_absolute() :]
         files = Path(p.root).glob(str(Path(*parts)))
@@ -113,6 +117,8 @@ def get_keys(keypart, search_pattern):
             m = tier_pattern_rx.match(str(f))
             if m is not None:
                 d = m.groupdict()
+                if "ext" in d:
+                    d.pop("ext")
                 key = FileKey(**d)
                 keys.append(key)
         return keys

From 48b326dbd4eadbd0c8334320d0af4a27fbadfd7f Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Mon, 20 Jan 2025 19:10:24 +0100
Subject: [PATCH 044/101] A lot of fixes in complete_run.py

---
 rules/main.smk            |  2 +-
 rules/raw.smk             |  4 +--
 scripts/build_fdb.py      |  2 +-
 scripts/check_blinding.py |  2 +-
 scripts/complete_run.py   | 51 +++++++++++++++++++++------------------
 scripts/util/__init__.py  | 16 ++++++------
 scripts/util/utils.py     |  4 +--
 7 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/rules/main.smk b/rules/main.smk
index 153fab4..be671c0 100644
--- a/rules/main.smk
+++ b/rules/main.smk
@@ -48,6 +48,6 @@ rule autogen_output:
         filedb_path=os.path.join(pars_path(setup), "filedb"),
         setup=lambda wildcards: setup,
         basedir=basedir,
-    threads: workflow.cores
+    threads: min(workflow.cores, 64)
     script:
         "../scripts/complete_run.py"
diff --git a/rules/raw.smk b/rules/raw.smk
index fd95467..411b23f 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -21,7 +21,7 @@ raw_par_catalog = ParsKeyResolve.get_par_catalog(
 
 rule build_raw_orca:
     """
-    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    This rule runs build_raw(), it takes in a file.fcio and outputs a raw file
     """
     input:
         get_pattern_tier_daq(setup, extension="orca"),
@@ -51,7 +51,7 @@ rule build_raw_orca:
 
 rule build_raw_fcio:
     """
-    This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
+    This rule runs build_raw(), it takes in a file.fcio and outputs a raw file
     """
     input:
         get_pattern_tier_daq(setup, extension="fcio"),
diff --git a/scripts/build_fdb.py b/scripts/build_fdb.py
index b9c127b..f628341 100644
--- a/scripts/build_fdb.py
+++ b/scripts/build_fdb.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta.catalog import Props
+from dbetto.catalog import Props
 from lgdo import lh5
 from pygama.flow.file_db import FileDB
 
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 44261a5..4298c6e 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -84,7 +84,7 @@
 # check for peaks within +- 5keV of  2614 and 583 to ensure blinding still
 # valid and if so create file else raise error.  if detector is in ac mode it
 # will always pass this check
-if np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5) or det_status is False:
+if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False:
     Path(args.output).parent.mkdir(parents=True, exist_ok=True)
     Props.write_to(args.output, {})
 else:
diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index e4c5eb3..03cfd51 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -3,6 +3,7 @@
 import datetime
 import json
 import os
+import subprocess
 import time
 from pathlib import Path
 
@@ -157,12 +158,12 @@ def find_gen_runs(gen_tier_path):
     # first look for non-concat tiers
     paths = gen_tier_path.glob("*/*/*/*")
     # use the directories to build a datatype/period/run string
-    runs = {"/".join(p.name.split("/")[-3:]) for p in paths}
+    runs = {"/".join(str(p).split("/")[-3:]) for p in paths}
 
     # then look for concat tiers (use filenames now)
     paths_concat = gen_tier_path.glob("*/*/*.lh5")
     # use the directories to build a datatype/period/run string
-    runs_concat = {"/".join([p.name.split("-")[3]] + p.name.split("-")[1:3]) for p in paths_concat}
+    runs_concat = {"/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat}
 
     return runs | runs_concat
 
@@ -188,30 +189,32 @@ def build_file_dbs(gen_tier_path, outdir):
         logfile = Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name
         print(f"INFO: ......building {outfile}")
 
-        cmdline = ut.runcmd(snakemake.params.setup, aslist=True)
-        prodenv = as_ro(os.getenv("PRODENV"))
-        cmdline += [f"--env=PRODENV={prodenv}"]
+        cmdline = [
+            *ut.runcmd(snakemake.params.setup, aslist=True),
+            "--",
+            "python3",
+            "-B",
+            f"{snakemake.params.basedir}/scripts/build_fdb.py",
+            "--scan-path",
+            spec,
+            "--output",
+            str(outfile),
+            "--config",
+            str(outdir / "file_db_config.json"),
+            "--log",
+            str(logfile),
+        ]
+
+        if speck[0] == "phy":
+            cmdline += ["--assume-nonsparse"]
+
+        print(cmdline)
+        print(" ".join(cmdline))
+
+        cmdenv = {}
 
         # TODO: forward stdout to log file
-        processes.add(
-            subprocess.Popen(
-                [
-                    *cmdline,
-                    "python3",
-                    "-B",
-                    f"{snakemake.params.basedir}/scripts/build_fdb.py",
-                    "--scan-path",
-                    spec,
-                    "--output",
-                    str(outfile),
-                    "--config",
-                    str(outdir / "file_db_config.json"),
-                    "--log",
-                    str(logfile),
-                    "--assume-nonsparse" if speck[0] == "phy" else "",
-                ],
-            )
-        )
+        processes.add(subprocess.Popen(cmdline))
 
         if len(processes) >= snakemake.threads:
             os.wait()
diff --git a/scripts/util/__init__.py b/scripts/util/__init__.py
index caa4dd2..d103033 100644
--- a/scripts/util/__init__.py
+++ b/scripts/util/__init__.py
@@ -12,18 +12,18 @@
 )
 
 __all__ = [
-    "Props",
-    "PropsStream",
-    "Catalog",
-    "ParsKeyResolve",
     "CalGrouping",
-    "FileKey",
-    "ProcessingFileKey",
+    "Catalog",
     "ChannelProcKey",
+    "FileKey",
     "ParsCatalog",
-    "unix_time",
+    "ParsKeyResolve",
+    "ProcessingFileKey",
+    "Props",
+    "PropsStream",
     "runcmd",
-    "subst_vars_impl",
     "subst_vars",
+    "subst_vars_impl",
     "subst_vars_in_snakemake_config",
+    "unix_time",
 ]
diff --git a/scripts/util/utils.py b/scripts/util/utils.py
index 9d64b06..0b45a81 100644
--- a/scripts/util/utils.py
+++ b/scripts/util/utils.py
@@ -110,9 +110,9 @@ def filelist_path(setup):
 
 def runcmd(setup, aslist=False):
     cmdline = shlex.split(setup["execenv"]["cmd"])
-    cmdline += ["--env=" + "'PYTHONUSERBASE=" + f"{setup['paths']['install']}" + "'"]
+    cmdline += ["--env=" + "PYTHONUSERBASE=" + f"{setup['paths']['install']}"]
     if "env" in setup["execenv"]:
-        cmdline += [f'--env="{var}={val}"' for var, val in setup["execenv"]["env"].items()]
+        cmdline += [f"--env={var}={val}" for var, val in setup["execenv"]["env"].items()]
 
     cmdline += shlex.split(setup["execenv"]["arg"])
 

From 0b558ddb43988f1134e58d9dce61c9c8b1b295ea Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 21:22:28 +0100
Subject: [PATCH 045/101] fix weird filelist len bug by moving to script

---
 Snakefile-build-raw       | 13 ++-----------
 scripts/write_filelist.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 11 deletions(-)
 create mode 100644 scripts/write_filelist.py

diff --git a/Snakefile-build-raw b/Snakefile-build-raw
index 7a4779f..c4fb1dd 100644
--- a/Snakefile-build-raw
+++ b/Snakefile-build-raw
@@ -94,17 +94,8 @@ rule gen_filelist:
         ),
     output:
         temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
-    run:
-        print(f"INFO: found {len(input)} files")
-        if len(input) == 0:
-            print(
-                f"WARNING: no DAQ files found for the given pattern: {wildcards.label}. "
-                "make sure patterns follows the format: "
-                "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
-            )
-        with open(output[0], "w") as f:
-            for fn in input:
-                f.write(f"{fn}\n")
+    script:
+        "scripts/write_filelist.py"
 
 
 rule sort_data:
diff --git a/scripts/write_filelist.py b/scripts/write_filelist.py
new file mode 100644
index 0000000..f27c2ad
--- /dev/null
+++ b/scripts/write_filelist.py
@@ -0,0 +1,14 @@
+# ruff: noqa: F821, T201
+# from snakemake.script import snakemake # snakemake > 8.16
+from pathlib import Path
+
+print(f"INFO: found {len(snakemake.input)} files")
+if len(snakemake.input) == 0:
+    print(
+        f"WARNING: no DAQ files found for the given pattern: {snakemake.wildcards.label}. "
+        "make sure patterns follows the format: "
+        "all-{experiment}-{period}-{run}-{datatype}-{timestamp}-{tier}.gen"
+    )
+with Path(snakemake.output[0]).open("w") as f:
+    for fn in snakemake.input:
+        f.write(f"{fn}\n")

From 689164bcc2ecee28bbead6d7c83f30d6dca7d6e4 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 20 Jan 2025 23:30:50 +0100
Subject: [PATCH 046/101] fix log import

---
 scripts/build_dsp.py                | 2 +-
 scripts/build_hit.py                | 2 +-
 scripts/build_raw_orca.py           | 2 +-
 scripts/build_skm.py                | 2 +-
 scripts/build_tcm.py                | 2 +-
 scripts/pars_dsp_build_svm.py       | 2 +-
 scripts/pars_dsp_dplms.py           | 2 +-
 scripts/pars_dsp_eopt.py            | 2 +-
 scripts/pars_dsp_event_selection.py | 2 +-
 scripts/pars_dsp_nopt.py            | 2 +-
 scripts/pars_dsp_tau.py             | 2 +-
 scripts/pars_hit_aoe.py             | 2 +-
 scripts/pars_hit_ecal.py            | 2 +-
 scripts/pars_hit_lq.py              | 2 +-
 scripts/pars_hit_qc.py              | 2 +-
 scripts/pars_pht_aoecal.py          | 2 +-
 scripts/pars_pht_fast.py            | 2 +-
 scripts/pars_pht_lqcal.py           | 2 +-
 scripts/pars_pht_partcal.py         | 2 +-
 scripts/pars_pht_qc.py              | 2 +-
 scripts/pars_pht_qc_phy.py          | 2 +-
 scripts/pars_tcm_pulser.py          | 2 +-
 22 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index f7b4141..603124d 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -9,7 +9,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from utils.log import build_log
+from util.log import build_log
 
 
 def replace_list_with_array(dic):
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index cec39b7..cd48f7c 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -6,7 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py
index b307b01..711ecdd 100644
--- a/scripts/build_raw_orca.py
+++ b/scripts/build_raw_orca.py
@@ -6,7 +6,7 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index c8ff972..cfd52e0 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -5,7 +5,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
-from utils.log import build_log
+from util.log import build_log
 
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 4707410..156e4c8 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -7,7 +7,7 @@
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from pygama.evt.build_tcm import build_tcm
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index a31a8c1..b9174ec 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -6,7 +6,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from sklearn.svm import SVC
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 64c7a9f..3e99228 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -10,7 +10,7 @@
 from legendmeta.catalog import Props
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 5e9a009..c95842d 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -17,7 +17,7 @@
     run_bayesian_optimisation,
     run_one_dsp,
 )
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 9999134..d5a924c 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -14,7 +14,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 85883b8..766159c 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -10,7 +10,7 @@
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
-from utils.log import build_log
+from util.log import build_log
 
 sto = lh5.LH5Store()
 
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 4f3cf9d..b45a801 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -9,7 +9,7 @@
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index 4d3f503..c61322c 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -15,7 +15,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index aab5f41..b8ba61a 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -23,7 +23,7 @@
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 3487c38..48811ad 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -16,7 +16,7 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 6b3369f..d68aaeb 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -19,7 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 91ae176..0591f53 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -18,7 +18,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index b8d48d2..f916ad3 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -16,7 +16,7 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 101acea..7185ab1 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -18,7 +18,7 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 6eb25eb..228107e 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -18,7 +18,7 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from util.FileKey import ChannelProcKey, ProcessingFileKey
-from utils.log import build_log
+from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index f3f634b..e79014f 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -19,7 +19,7 @@
 )
 from pygama.pargen.utils import load_data
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index e642aa3..628a104 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -18,7 +18,7 @@
     get_keys,
 )
 from util.convert_np import convert_dict_np_to_float
-from utils.log import build_log
+from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index 4ae8843..c48338a 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -7,7 +7,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
-from utils.log import build_log
+from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)

From 2c47ca94d71090a1eba293007f5e79c4441b0b46 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 21 Jan 2025 14:43:55 +0100
Subject: [PATCH 047/101] Remove leftover print statements

---
 scripts/complete_run.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index 03cfd51..c462367 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -208,9 +208,6 @@ def build_file_dbs(gen_tier_path, outdir):
         if speck[0] == "phy":
             cmdline += ["--assume-nonsparse"]
 
-        print(cmdline)
-        print(" ".join(cmdline))
-
         cmdenv = {}
 
         # TODO: forward stdout to log file

From 35e8b562542c88243a6f6a87aecf8c96a7496726 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 22 Jan 2025 00:00:11 +0100
Subject: [PATCH 048/101] Rename scripts.util to scripts.library

---
 .ruff.toml                                       |  1 -
 Snakefile                                        | 10 +++++-----
 rules/ann.smk                                    |  2 +-
 rules/blinding_calibration.smk                   |  2 +-
 rules/blinding_check.smk                         |  2 +-
 rules/chanlist_gen.smk                           |  6 +++---
 rules/common.smk                                 |  8 ++++----
 rules/dsp.smk                                    |  8 ++++----
 rules/evt.smk                                    |  4 ++--
 rules/filelist_gen.smk                           |  4 ++--
 rules/hit.smk                                    |  6 +++---
 rules/main.smk                                   |  2 +-
 rules/pht.smk                                    |  8 ++++----
 rules/pht_fast.smk                               |  8 ++++----
 rules/psp.smk                                    |  8 ++++----
 rules/qc_phy.smk                                 |  8 ++++----
 rules/raw.smk                                    |  6 +++---
 rules/skm.smk                                    |  2 +-
 rules/tcm.smk                                    |  2 +-
 scripts/build_dsp.py                             |  2 +-
 scripts/build_evt.py                             |  2 +-
 scripts/build_hit.py                             |  2 +-
 scripts/build_raw_blind.py                       |  2 +-
 scripts/build_raw_fcio.py                        |  2 +-
 scripts/build_raw_orca.py                        |  2 +-
 scripts/build_skm.py                             |  2 +-
 scripts/build_tcm.py                             |  2 +-
 scripts/check_blinding.py                        |  2 +-
 scripts/complete_run.py                          |  6 +++---
 scripts/{util => library}/FileKey.py             |  0
 scripts/{util => library}/__init__.py            |  0
 scripts/{util => library}/cal_grouping.py        |  0
 scripts/{util => library}/catalog.py             |  0
 scripts/{util => library}/convert_np.py          |  0
 scripts/{util => library}/create_pars_keylist.py |  0
 scripts/{util => library}/log.py                 |  0
 scripts/{util => library}/pars_loading.py        |  0
 scripts/{util => library}/patterns.py            |  0
 scripts/{util => library}/utils.py               |  0
 scripts/merge_channels.py                        |  2 +-
 scripts/par_psp.py                               |  2 +-
 scripts/pars_dsp_build_svm.py                    |  2 +-
 scripts/pars_dsp_dplms.py                        |  2 +-
 scripts/pars_dsp_eopt.py                         |  2 +-
 scripts/pars_dsp_event_selection.py              |  2 +-
 scripts/pars_dsp_nopt.py                         |  2 +-
 scripts/pars_dsp_tau.py                          |  2 +-
 scripts/pars_hit_aoe.py                          |  4 ++--
 scripts/pars_hit_ecal.py                         |  4 ++--
 scripts/pars_hit_lq.py                           |  4 ++--
 scripts/pars_hit_qc.py                           |  4 ++--
 scripts/pars_pht_aoecal.py                       |  4 ++--
 scripts/pars_pht_fast.py                         |  4 ++--
 scripts/pars_pht_lqcal.py                        |  4 ++--
 scripts/pars_pht_partcal.py                      |  4 ++--
 scripts/pars_pht_qc.py                           |  4 ++--
 scripts/pars_pht_qc_phy.py                       |  4 ++--
 scripts/pars_tcm_pulser.py                       |  2 +-
 tests/test_util.py                               |  6 +++---
 59 files changed, 92 insertions(+), 93 deletions(-)
 rename scripts/{util => library}/FileKey.py (100%)
 rename scripts/{util => library}/__init__.py (100%)
 rename scripts/{util => library}/cal_grouping.py (100%)
 rename scripts/{util => library}/catalog.py (100%)
 rename scripts/{util => library}/convert_np.py (100%)
 rename scripts/{util => library}/create_pars_keylist.py (100%)
 rename scripts/{util => library}/log.py (100%)
 rename scripts/{util => library}/pars_loading.py (100%)
 rename scripts/{util => library}/patterns.py (100%)
 rename scripts/{util => library}/utils.py (100%)

diff --git a/.ruff.toml b/.ruff.toml
index 8b4d420..bd28747 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -29,7 +29,6 @@ lint.extend-ignore = [
   "G004",   # Logging statement uses f-string
   "PLR",    # Design related pylint codes
   "E501",   # Line too long
-  "PT004",  # Use underscore for non-returning fixture (use usefixture instead)
   "RET505", # Unnecessary `else` after `return` statement
   "E402", # Module level imports at top of file need disable for numba default imports
   "NPY201", # Numpy 2 warnings ignore for the moment
diff --git a/Snakefile b/Snakefile
index f9168c3..10a6855 100644
--- a/Snakefile
+++ b/Snakefile
@@ -18,10 +18,10 @@ from datetime import datetime
 from collections import OrderedDict
 import logging
 
-import scripts.util as ds
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.patterns import get_pattern_tier
-from scripts.util.utils import (
+import scripts.library as lib
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.patterns import get_pattern_tier
+from scripts.library.utils import (
     subst_vars_in_snakemake_config,
     runcmd,
     config_path,
@@ -45,7 +45,7 @@ chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
 det_status = det_status_path(setup)
 swenv = runcmd(setup)
-part = ds.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
+part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
 basedir = workflow.basedir
 
 
diff --git a/rules/ann.smk b/rules/ann.smk
index 15558ae..f1a47cd 100644
--- a/rules/ann.smk
+++ b/rules/ann.smk
@@ -4,7 +4,7 @@ to apply the ann and risetime cuts for psd.
 
 """
 
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
diff --git a/rules/blinding_calibration.smk b/rules/blinding_calibration.smk
index 85ee2f6..d28072f 100644
--- a/rules/blinding_calibration.smk
+++ b/rules/blinding_calibration.smk
@@ -4,7 +4,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps:
 - combining all channels into single par file
 """
 
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_pars,
     get_pattern_plts,
     get_pattern_pars_tmp_channel,
diff --git a/rules/blinding_check.smk b/rules/blinding_check.smk
index eb3407d..e556abb 100644
--- a/rules/blinding_check.smk
+++ b/rules/blinding_check.smk
@@ -4,7 +4,7 @@ Snakemake rules for checking blinding. Two steps:
 - combining all channel check files into single check file
 """
 
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/chanlist_gen.smk b/rules/chanlist_gen.smk
index 820d0fa..68c33e4 100644
--- a/rules/chanlist_gen.smk
+++ b/rules/chanlist_gen.smk
@@ -4,12 +4,12 @@ import os
 import random
 import re
 
-from scripts.util.FileKey import ChannelProcKey
-from scripts.util.patterns import (
+from scripts.library.FileKey import ChannelProcKey
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
 )
-from scripts.util.utils import filelist_path, runcmd
+from scripts.library.utils import filelist_path, runcmd
 
 
 def get_par_chanlist(
diff --git a/rules/common.smk b/rules/common.smk
index 5625c79..2f8a82f 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -3,16 +3,16 @@ Helper functions for running data production
 """
 
 from pathlib import Path
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     par_overwrite_path,
     get_pars_path,
 )
-from scripts.util import ProcessingFileKey
-from scripts.util.catalog import Catalog
-from scripts.util import utils
+from scripts.library import ProcessingFileKey
+from scripts.library.catalog import Catalog
+from scripts.library import utils
 
 
 def ro(path):
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 66a18c8..8000fa2 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -6,11 +6,11 @@ Snakemake rules for processing dsp tier. This is done in 4 steps:
 - running dsp over all channels using par file
 """
 
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.util.create_pars_keylist import ParsKeyResolve
-from scripts.util.patterns import (
+from scripts.library.create_pars_keylist import ParsKeyResolve
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/evt.smk b/rules/evt.smk
index 112c92c..4e96a85 100644
--- a/rules/evt.smk
+++ b/rules/evt.smk
@@ -2,8 +2,8 @@
 Snakemake rules for processing evt tier.
 """
 
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.patterns import (
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index b3255f8..5e1857f 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -2,8 +2,8 @@ import glob
 import json, yaml
 from pathlib import Path
 
-from scripts.util.FileKey import FileKey, run_grouper
-from scripts.util.patterns import (
+from scripts.library.FileKey import FileKey, run_grouper
+from scripts.library.patterns import (
     get_pattern_tier,
     get_pattern_tier_raw_blind,
     get_pattern_tier_daq,
diff --git a/rules/hit.smk b/rules/hit.smk
index bb42651..5ea14ff 100644
--- a/rules/hit.smk
+++ b/rules/hit.smk
@@ -6,10 +6,10 @@ Snakemake rules for processing hit tier. This is done in 4 steps:
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/main.smk b/rules/main.smk
index be671c0..f227f17 100644
--- a/rules/main.smk
+++ b/rules/main.smk
@@ -1,6 +1,6 @@
 import os
 from datetime import datetime
-from scripts.util.utils import (
+from scripts.library.utils import (
     filelist_path,
     log_path,
     tmp_par_path,
diff --git a/rules/pht.smk b/rules/pht.smk
index e638832..239e3c5 100644
--- a/rules/pht.smk
+++ b/rules/pht.smk
@@ -6,11 +6,11 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.util.utils import filelist_path, set_last_rule_name
-from scripts.util.patterns import (
+from scripts.library.utils import filelist_path, set_last_rule_name
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/pht_fast.smk b/rules/pht_fast.smk
index 9369b6b..b177f12 100644
--- a/rules/pht_fast.smk
+++ b/rules/pht_fast.smk
@@ -1,7 +1,7 @@
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.create_pars_keylist import ParsKeyResolve
-from scripts.util.utils import filelist_path, set_last_rule_name
-from scripts.util.patterns import (
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.create_pars_keylist import ParsKeyResolve
+from scripts.library.utils import filelist_path, set_last_rule_name
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/psp.smk b/rules/psp.smk
index bde834d..eed63ae 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -6,11 +6,11 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.util.utils import set_last_rule_name
-from scripts.util.patterns import (
+from scripts.library.utils import set_last_rule_name
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/qc_phy.smk b/rules/qc_phy.smk
index b89d8d3..d7a10f4 100644
--- a/rules/qc_phy.smk
+++ b/rules/qc_phy.smk
@@ -1,7 +1,7 @@
-from scripts.util.pars_loading import ParsCatalog
-from scripts.util.create_pars_keylist import ParsKeyResolve
-from scripts.util.utils import filelist_path, set_last_rule_name
-from scripts.util.patterns import (
+from scripts.library.pars_loading import ParsCatalog
+from scripts.library.create_pars_keylist import ParsKeyResolve
+from scripts.library.utils import filelist_path, set_last_rule_name
+from scripts.library.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/rules/raw.smk b/rules/raw.smk
index fd95467..17d1e3b 100644
--- a/rules/raw.smk
+++ b/rules/raw.smk
@@ -1,12 +1,12 @@
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
-from scripts.util.utils import set_last_rule_name
-from scripts.util.create_pars_keylist import ParsKeyResolve
+from scripts.library.utils import set_last_rule_name
+from scripts.library.create_pars_keylist import ParsKeyResolve
 
 raw_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
diff --git a/rules/skm.smk b/rules/skm.smk
index 3c620bf..404b81b 100644
--- a/rules/skm.smk
+++ b/rules/skm.smk
@@ -2,7 +2,7 @@
 Snakemake rules for processing skm tier.
 """
 
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
diff --git a/rules/tcm.smk b/rules/tcm.smk
index e3a3410..941455d 100644
--- a/rules/tcm.smk
+++ b/rules/tcm.smk
@@ -2,7 +2,7 @@
 Snakemake file containing the rules for generating the tcm
 """
 
-from scripts.util.patterns import (
+from scripts.library.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars_tmp_channel,
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index 603124d..aee335c 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -9,7 +9,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from util.log import build_log
+from library.log import build_log
 
 
 def replace_list_with_array(dic):
diff --git a/scripts/build_evt.py b/scripts/build_evt.py
index e56912b..6ef1e0f 100644
--- a/scripts/build_evt.py
+++ b/scripts/build_evt.py
@@ -8,8 +8,8 @@
 from dbetto import Props, TextDB
 from legendmeta import LegendMetadata
 from lgdo.types import Array
+from library.log import build_log
 from pygama.evt import build_evt
-from util.log import build_log
 
 sto = lh5.LH5Store()
 
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index cd48f7c..776bd59 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -5,8 +5,8 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from library.log import build_log
 from pygama.hit.build_hit import build_hit
-from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 0b036dd..961b86a 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -18,7 +18,7 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
-from util.log import build_log
+from library.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/scripts/build_raw_fcio.py b/scripts/build_raw_fcio.py
index b4d2e22..ddc765c 100644
--- a/scripts/build_raw_fcio.py
+++ b/scripts/build_raw_fcio.py
@@ -6,7 +6,7 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from util.log import build_log
+from library.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/build_raw_orca.py b/scripts/build_raw_orca.py
index 711ecdd..0f5bbcb 100644
--- a/scripts/build_raw_orca.py
+++ b/scripts/build_raw_orca.py
@@ -6,7 +6,7 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from util.log import build_log
+from library.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index cfd52e0..f674e5d 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -5,7 +5,7 @@
 from legendmeta.catalog import Props
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
-from util.log import build_log
+from library.log import build_log
 
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index 156e4c8..c16b3c4 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -6,8 +6,8 @@
 from daq2lh5.orca import orca_flashcam
 from legendmeta import TextDB
 from legendmeta.catalog import Props
+from library.log import build_log
 from pygama.evt.build_tcm import build_tcm
-from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index f7b8dac..0b66c93 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -17,9 +17,9 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from library.log import build_log
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
-from util.log import build_log
 
 mpl.use("Agg")
 
diff --git a/scripts/complete_run.py b/scripts/complete_run.py
index c462367..7ffd73a 100644
--- a/scripts/complete_run.py
+++ b/scripts/complete_run.py
@@ -7,9 +7,9 @@
 import time
 from pathlib import Path
 
-import util.utils as ut
-from util import patterns
-from util.FileKey import FileKey
+import library.utils as ut
+from library import patterns
+from library.FileKey import FileKey
 
 print("INFO: dataflow ran successfully, now few final checks and scripts")
 
diff --git a/scripts/util/FileKey.py b/scripts/library/FileKey.py
similarity index 100%
rename from scripts/util/FileKey.py
rename to scripts/library/FileKey.py
diff --git a/scripts/util/__init__.py b/scripts/library/__init__.py
similarity index 100%
rename from scripts/util/__init__.py
rename to scripts/library/__init__.py
diff --git a/scripts/util/cal_grouping.py b/scripts/library/cal_grouping.py
similarity index 100%
rename from scripts/util/cal_grouping.py
rename to scripts/library/cal_grouping.py
diff --git a/scripts/util/catalog.py b/scripts/library/catalog.py
similarity index 100%
rename from scripts/util/catalog.py
rename to scripts/library/catalog.py
diff --git a/scripts/util/convert_np.py b/scripts/library/convert_np.py
similarity index 100%
rename from scripts/util/convert_np.py
rename to scripts/library/convert_np.py
diff --git a/scripts/util/create_pars_keylist.py b/scripts/library/create_pars_keylist.py
similarity index 100%
rename from scripts/util/create_pars_keylist.py
rename to scripts/library/create_pars_keylist.py
diff --git a/scripts/util/log.py b/scripts/library/log.py
similarity index 100%
rename from scripts/util/log.py
rename to scripts/library/log.py
diff --git a/scripts/util/pars_loading.py b/scripts/library/pars_loading.py
similarity index 100%
rename from scripts/util/pars_loading.py
rename to scripts/library/pars_loading.py
diff --git a/scripts/util/patterns.py b/scripts/library/patterns.py
similarity index 100%
rename from scripts/util/patterns.py
rename to scripts/library/patterns.py
diff --git a/scripts/util/utils.py b/scripts/library/utils.py
similarity index 100%
rename from scripts/util/utils.py
rename to scripts/library/utils.py
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index bed04d2..6a99062 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -7,7 +7,7 @@
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
 from lgdo import lh5
-from util.FileKey import ChannelProcKey
+from library.FileKey import ChannelProcKey
 
 
 def replace_path(d, old_path, new_path):
diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index a7dfbb2..5ae65c9 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -9,7 +9,7 @@
 import numpy as np
 from legendmeta import LegendMetadata
 from legendmeta.catalog import Props
-from util.FileKey import ChannelProcKey
+from library.FileKey import ChannelProcKey
 
 mpl.use("Agg")
 
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index b9174ec..3a01d1c 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -5,8 +5,8 @@
 from legendmeta import TextDB
 from legendmeta.catalog import Props
 from lgdo import lh5
+from library.log import build_log
 from sklearn.svm import SVC
-from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index 3e99228..dc38ad3 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -9,8 +9,8 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo import Array, Table
+from library.log import build_log
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
-from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index c95842d..14f1b05 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -11,13 +11,13 @@
 from dspeed.units import unit_registry as ureg
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.log import build_log
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
     BayesianOptimizer,
     run_bayesian_optimisation,
     run_one_dsp,
 )
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index d5a924c..64964c2 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -12,9 +12,9 @@
 import pygama.pargen.energy_cal as pgc
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.log import build_log
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index 766159c..f1f0f5c 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -8,9 +8,9 @@
 import pygama.pargen.noise_optimization as pno
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.log import build_log
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
-from util.log import build_log
 
 sto = lh5.LH5Store()
 
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index b45a801..1ac3451 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -6,10 +6,10 @@
 import numpy as np
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.log import build_log
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
-from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index c61322c..bfc681f 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -10,12 +10,12 @@
 import pandas as pd
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.convert_np import convert_dict_np_to_float
+from library.log import build_log
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
-from util.convert_np import convert_dict_np_to_float
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index b8ba61a..87a6afd 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -16,14 +16,14 @@
 import pygama.math.histogram as pgh
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.convert_np import convert_dict_np_to_float
+from library.log import build_log
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
-from util.convert_np import convert_dict_np_to_float
-from util.log import build_log
 
 log = logging.getLogger(__name__)
 mpl.use("agg")
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index 48811ad..db721af 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -9,14 +9,14 @@
 import pandas as pd
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.convert_np import convert_dict_np_to_float
+from library.log import build_log
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
-from util.convert_np import convert_dict_np_to_float
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index d68aaeb..97a2720 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -12,14 +12,14 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
+from library.convert_np import convert_dict_np_to_float
+from library.log import build_log
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
     get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
-from util.convert_np import convert_dict_np_to_float
-from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index 0591f53..bd6d484 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -13,12 +13,12 @@
 import pandas as pd
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.FileKey import ChannelProcKey, ProcessingFileKey
+from library.log import build_log
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
-from util.FileKey import ChannelProcKey, ProcessingFileKey
-from util.log import build_log
 
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index f916ad3..1db32ad 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -10,13 +10,13 @@
 import pandas as pd
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.FileKey import ChannelProcKey, ProcessingFileKey
+from library.log import build_log
 from pars_pht_aoecal import run_aoe_calibration
 from pars_pht_lqcal import run_lq_calibration
 from pars_pht_partcal import calibrate_partition
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
-from util.FileKey import ChannelProcKey, ProcessingFileKey
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 7185ab1..9e2f29a 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -11,14 +11,14 @@
 import pandas as pd
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.FileKey import ChannelProcKey, ProcessingFileKey
+from library.log import build_log
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
-from util.FileKey import ChannelProcKey, ProcessingFileKey
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 228107e..4915494 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -13,12 +13,12 @@
 import pygama.math.histogram as pgh
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.FileKey import ChannelProcKey, ProcessingFileKey
+from library.log import build_log
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
-from util.FileKey import ChannelProcKey, ProcessingFileKey
-from util.log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index e79014f..02afade 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -12,14 +12,14 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
+from library.convert_np import convert_dict_np_to_float
+from library.log import build_log
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
     get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
-from util.convert_np import convert_dict_np_to_float
-from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 628a104..86328dc 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -13,12 +13,12 @@
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
 from lgdo.lh5 import ls
+from library.convert_np import convert_dict_np_to_float
+from library.log import build_log
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
 )
-from util.convert_np import convert_dict_np_to_float
-from util.log import build_log
 
 log = logging.getLogger(__name__)
 
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index c48338a..f230ad0 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -6,8 +6,8 @@
 import numpy as np
 from legendmeta import LegendMetadata, TextDB
 from legendmeta.catalog import Props
+from library.log import build_log
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
-from util.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
diff --git a/tests/test_util.py b/tests/test_util.py
index 010c749..acaf609 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,7 +1,7 @@
 import json
 from pathlib import Path
 
-from scripts.util import (
+from scripts.library import (
     CalibCatalog,
     FileKey,
     pars_catalog,
@@ -9,8 +9,8 @@
     subst_vars,
     unix_time,
 )
-from scripts.util.patterns import get_pattern_tier_daq, get_pattern_tier_dsp
-from scripts.util.utils import (
+from scripts.library.patterns import get_pattern_tier_daq, get_pattern_tier_dsp
+from scripts.library.utils import (
     par_dsp_path,
     par_overwrite_path,
     tier_dsp_path,

From 0bb23dfbfa32444cdd987a96bc41663d1d018626 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 22 Jan 2025 00:10:29 +0100
Subject: [PATCH 049/101] Replace scripts.library.catalog with dbetto.catalog

---
 rules/common.smk                    |   2 +-
 rules/filelist_gen.smk              |  24 ++--
 scripts/blinding_calibration.py     |   2 +-
 scripts/build_dsp.py                |   5 +-
 scripts/build_hit.py                |   2 +-
 scripts/build_raw_blind.py          |   2 +-
 scripts/build_skm.py                |   4 +-
 scripts/build_tcm.py                |   4 +-
 scripts/check_blinding.py           |   5 +-
 scripts/create_chankeylist.py       |   3 +-
 scripts/library/catalog.py          | 191 ----------------------------
 scripts/library/pars_loading.py     |   3 +-
 scripts/merge_channels.py           |   2 +-
 scripts/par_psp.py                  |   2 +-
 scripts/pars_dsp_build_svm.py       |   4 +-
 scripts/pars_dsp_dplms.py           |   5 +-
 scripts/pars_dsp_eopt.py            |   5 +-
 scripts/pars_dsp_event_selection.py |   5 +-
 scripts/pars_dsp_nopt.py            |   5 +-
 scripts/pars_dsp_svm.py             |   2 +-
 scripts/pars_dsp_tau.py             |   5 +-
 scripts/pars_hit_aoe.py             |   5 +-
 scripts/pars_hit_ecal.py            |   5 +-
 scripts/pars_hit_lq.py              |   5 +-
 scripts/pars_hit_qc.py              |   5 +-
 scripts/pars_pht_aoecal.py          |   5 +-
 scripts/pars_pht_fast.py            |   5 +-
 scripts/pars_pht_lqcal.py           |   5 +-
 scripts/pars_pht_partcal.py         |   5 +-
 scripts/pars_pht_qc.py              |   5 +-
 scripts/pars_pht_qc_phy.py          |   5 +-
 scripts/pars_tcm_pulser.py          |   5 +-
 32 files changed, 86 insertions(+), 251 deletions(-)
 delete mode 100644 scripts/library/catalog.py

diff --git a/rules/common.smk b/rules/common.smk
index 2f8a82f..4f99d5c 100644
--- a/rules/common.smk
+++ b/rules/common.smk
@@ -11,7 +11,7 @@ from scripts.library.patterns import (
     get_pars_path,
 )
 from scripts.library import ProcessingFileKey
-from scripts.library.catalog import Catalog
+from dbetto.catalog import Catalog
 from scripts.library import utils
 
 
diff --git a/rules/filelist_gen.smk b/rules/filelist_gen.smk
index 5e1857f..5d1f928 100644
--- a/rules/filelist_gen.smk
+++ b/rules/filelist_gen.smk
@@ -22,10 +22,13 @@ def expand_runs(in_dict):
         "p01": "r001..r005"
     }
     """
-    for per, run_list in in_dict.items():
-        if isinstance(run_list, str) and ".." in runs:
-            start, end = runs.split("..")
-            in_dict[per] = [f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)]
+    for per, datalist in in_dict.items():
+        for datatype, run_list in datalist.items():
+            if isinstance(run_list, str) and ".." in runs:
+                start, end = runs.split("..")
+                in_dict[per][datatype] = [
+                    f"r{x:03}" for x in range(int(start[1:]), int(end[1:]) + 1)
+                ]
     return in_dict
 
 
@@ -200,14 +203,17 @@ def build_filelist(
                         other_filenames += filename
                 else:
                     if (
-                        _key.period
-                        in analysis_runs  # check if period in analysis_runs dicts
+                        _key.datatype in analysis_runs
+                        and _key.period
+                        in analysis_runs[
+                            _key.datatype
+                        ]  # check if period in analysis_runs dicts
                         and (
                             _key.run
-                            in analysis_runs[
-                                _key.period
+                            in analysis_runs[_key.period][
+                                _key.datatype
                             ]  # check if run in analysis_runs dicts
-                            or analysis_runs[_key.period]
+                            or analysis_runs[_key.period][_key.datatype]
                             == "all"  # or if runs is just specified as "all"
                         )
                     ):
diff --git a/scripts/blinding_calibration.py b/scripts/blinding_calibration.py
index 072e756..4a666cc 100644
--- a/scripts/blinding_calibration.py
+++ b/scripts/blinding_calibration.py
@@ -12,8 +12,8 @@
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
+from dbetto.catalog import Props
 from legendmeta import LegendMetadata
-from legendmeta.catalog import Props
 from lgdo import lh5
 from pygama.pargen.energy_cal import HPGeCalibration
 
diff --git a/scripts/build_dsp.py b/scripts/build_dsp.py
index aee335c..6f97406 100644
--- a/scripts/build_dsp.py
+++ b/scripts/build_dsp.py
@@ -5,9 +5,10 @@
 from pathlib import Path
 
 import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
 from dspeed import build_dsp
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo import lh5
 from library.log import build_log
 
diff --git a/scripts/build_hit.py b/scripts/build_hit.py
index 776bd59..6310521 100644
--- a/scripts/build_hit.py
+++ b/scripts/build_hit.py
@@ -2,8 +2,8 @@
 import time
 from pathlib import Path
 
+from dbetto.catalog import Props
 from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
 from lgdo import lh5
 from library.log import build_log
 from pygama.hit.build_hit import build_hit
diff --git a/scripts/build_raw_blind.py b/scripts/build_raw_blind.py
index 961b86a..e343bde 100644
--- a/scripts/build_raw_blind.py
+++ b/scripts/build_raw_blind.py
@@ -15,8 +15,8 @@
 
 import numexpr as ne
 import numpy as np
+from dbetto.catalog import Props
 from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
 from lgdo import lh5
 from library.log import build_log
 
diff --git a/scripts/build_skm.py b/scripts/build_skm.py
index f674e5d..aefc31b 100644
--- a/scripts/build_skm.py
+++ b/scripts/build_skm.py
@@ -1,8 +1,8 @@
 import argparse
 
 import awkward as ak
-from legendmeta import TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
 from library.log import build_log
diff --git a/scripts/build_tcm.py b/scripts/build_tcm.py
index c16b3c4..2718c00 100644
--- a/scripts/build_tcm.py
+++ b/scripts/build_tcm.py
@@ -4,8 +4,8 @@
 import lgdo.lh5 as lh5
 import numpy as np
 from daq2lh5.orca import orca_flashcam
-from legendmeta import TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
 from library.log import build_log
 from pygama.evt.build_tcm import build_tcm
 
diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index 0b66c93..f5dd378 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -14,8 +14,9 @@
 import matplotlib.pyplot as plt
 import numexpr as ne
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo import lh5
 from library.log import build_log
 from pygama.math.histogram import get_hist
diff --git a/scripts/create_chankeylist.py b/scripts/create_chankeylist.py
index f01c879..a75be8b 100644
--- a/scripts/create_chankeylist.py
+++ b/scripts/create_chankeylist.py
@@ -1,7 +1,8 @@
 import argparse
 from pathlib import Path
 
-from legendmeta import LegendMetadata, TextDB
+from dbetto import TextDB
+from legendmeta import LegendMetadata
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--det_status", help="det_status", type=str, required=True)
diff --git a/scripts/library/catalog.py b/scripts/library/catalog.py
deleted file mode 100644
index 739e21a..0000000
--- a/scripts/library/catalog.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#
-# Copyright (C) 2015 Oliver Schulz <oschulz@mpp.mpg.de>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-This module stores the scripts for leading validity files based on timestamp and system
-"""
-
-import bisect
-import collections
-import copy
-import json
-import types
-from collections import namedtuple
-from pathlib import Path
-
-import yaml
-
-from .utils import unix_time
-
-
-class Props:
-    @staticmethod
-    def read_from(sources):
-        def read_impl(sources):
-            if isinstance(sources, (str, Path)):
-                file_name = sources
-                if isinstance(file_name, str):
-                    file_name = Path(file_name)
-                if file_name.suffix in (".yaml", ".yml"):
-                    with file_name.open() as file:
-                        return yaml.safe_load(file)
-                elif file_name.suffix == ".json":
-                    with file_name.open() as file:
-                        return json.load(file)
-                else:
-                    msg = f"Can't run Props.read_from on file with suffix {file_name.suffix}"
-                    raise ValueError(msg)
-            elif isinstance(sources, list):
-                result = {}
-                for p in map(read_impl, sources):
-                    Props.add_to(result, p)
-                return result
-            else:
-                msg = f"Can't run Props.read_from on sources-value of type {type(sources)}"
-                raise ValueError(msg)
-
-        return read_impl(sources)
-
-    @staticmethod
-    def add_to(props_a, props_b):
-        a = props_a
-        b = props_b
-
-        for key in b:
-            if key in a:
-                if isinstance(a[key], dict) and isinstance(b[key], dict):
-                    Props.add_to(a[key], b[key])
-                elif a[key] != b[key]:
-                    a[key] = copy.copy(b[key])
-            else:
-                a[key] = copy.copy(b[key])
-
-
-class PropsStream:
-    """Simple class to control loading of validity.yaml files"""
-
-    @staticmethod
-    def get(value):
-        if isinstance(value, (str, Path)):
-            return PropsStream.read_from(value)
-
-        if isinstance(value, (collections.abc.Sequence, types.GeneratorType)):
-            return value
-
-        msg = f"Can't get PropsStream from value of type {type(value)}"
-        raise ValueError(msg)
-
-    @staticmethod
-    def read_from(file_name):
-        with Path(file_name).open() as r:
-            file = yaml.safe_load(r)
-        file = sorted(file, key=lambda item: unix_time(item["valid_from"]))
-        yield from file
-
-
-class Catalog(namedtuple("Catalog", ["entries"])):
-    """Implementation of the `YAML metadata validity specification <https://legend-exp.github.io/legend-data-format-specs/dev/metadata/#Specifying-metadata-validity-in-time-(and-system)>`_."""
-
-    __slots__ = ()
-
-    class Entry(namedtuple("Entry", ["valid_from", "file"])):
-        __slots__ = ()
-
-    @staticmethod
-    def get(value):
-        if isinstance(value, Catalog):
-            return value
-
-        if isinstance(value, str):
-            return Catalog.read_from(value)
-
-        msg = f"Can't get Catalog from value of type {type(value)}"
-        raise ValueError(msg)
-
-    @staticmethod
-    def read_from(file_name):
-        """Read from a valdiity YAML file and build a Catalog object"""
-        entries = {}
-        for props in PropsStream.get(file_name):
-            timestamp = props["valid_from"]
-            system = "all" if props.get("category") is None else props["category"]
-            file_key = props["apply"]
-            if system not in entries:
-                entries[system] = []
-            mode = "append" if props.get("mode") is None else props["mode"]
-            mode = "reset" if len(entries[system]) == 0 else mode
-            if mode == "reset":
-                new = file_key
-            elif mode == "append":
-                new = entries[system][-1].file.copy() + file_key
-            elif mode == "remove":
-                new = entries[system][-1].file.copy()
-                for file in file_key:
-                    new.remove(file)
-            elif mode == "replace":
-                new = entries[system][-1].file.copy()
-                if len(file_key) != 2:
-                    msg = f"Invalid number of elements in replace mode: {len(file_key)}"
-                    raise ValueError(msg)
-                new.remove(file_key[0])
-                new += [file_key[1]]
-
-            else:
-                msg = f"Unknown mode for {timestamp}"
-                raise ValueError(msg)
-
-            if timestamp in [entry.valid_from for entry in entries[system]]:
-                msg = (
-                    f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry"
-                )
-                raise ValueError(msg)
-            entries[system].append(Catalog.Entry(unix_time(timestamp), new))
-
-        for system, system_dict in entries.items():
-            entries[system] = sorted(system_dict, key=lambda entry: entry.valid_from)
-        return Catalog(entries)
-
-    def valid_for(self, timestamp, system="all", allow_none=False):
-        """Get the valid entries for a given timestamp and system"""
-        if system in self.entries:
-            valid_from = [entry.valid_from for entry in self.entries[system]]
-            pos = bisect.bisect_right(valid_from, unix_time(timestamp))
-            if pos > 0:
-                return self.entries[system][pos - 1].file
-
-            if system != "all":
-                return self.valid_for(timestamp, system="all", allow_none=allow_none)
-
-            if allow_none:
-                return None
-
-            msg = f"No valid entries found for timestamp: {timestamp}, system: {system}"
-            raise RuntimeError(msg)
-
-        if system != "all":
-            return self.valid_for(timestamp, system="all", allow_none=allow_none)
-
-        if allow_none:
-            return None
-
-        msg = f"No entries found for system: {system}"
-        raise RuntimeError(msg)
-
-    @staticmethod
-    def get_files(catalog_file, timestamp, category="all"):
-        """Helper function to get the files for a given timestamp and category"""
-        catalog = Catalog.read_from(catalog_file)
-        return Catalog.valid_for(catalog, timestamp, category)
diff --git a/scripts/library/pars_loading.py b/scripts/library/pars_loading.py
index 137ae03..80f54a6 100644
--- a/scripts/library/pars_loading.py
+++ b/scripts/library/pars_loading.py
@@ -5,7 +5,8 @@
 
 from pathlib import Path
 
-from .catalog import Catalog
+from dbetto.catalog import Catalog
+
 from .FileKey import ProcessingFileKey
 
 # from .patterns import
diff --git a/scripts/merge_channels.py b/scripts/merge_channels.py
index 6a99062..209708d 100644
--- a/scripts/merge_channels.py
+++ b/scripts/merge_channels.py
@@ -4,8 +4,8 @@
 from pathlib import Path
 
 import numpy as np
+from dbetto.catalog import Props
 from legendmeta import LegendMetadata
-from legendmeta.catalog import Props
 from lgdo import lh5
 from library.FileKey import ChannelProcKey
 
diff --git a/scripts/par_psp.py b/scripts/par_psp.py
index 5ae65c9..d996f3c 100644
--- a/scripts/par_psp.py
+++ b/scripts/par_psp.py
@@ -7,8 +7,8 @@
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import numpy as np
+from dbetto.catalog import Props
 from legendmeta import LegendMetadata
-from legendmeta.catalog import Props
 from library.FileKey import ChannelProcKey
 
 mpl.use("Agg")
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm.py
index 3a01d1c..7a0ecc9 100644
--- a/scripts/pars_dsp_build_svm.py
+++ b/scripts/pars_dsp_build_svm.py
@@ -2,8 +2,8 @@
 import pickle as pkl
 from pathlib import Path
 
-from legendmeta import TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
 from lgdo import lh5
 from library.log import build_log
 from sklearn.svm import SVC
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms.py
index dc38ad3..457bda1 100644
--- a/scripts/pars_dsp_dplms.py
+++ b/scripts/pars_dsp_dplms.py
@@ -6,8 +6,9 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo import Array, Table
 from library.log import build_log
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt.py
index 14f1b05..a957c66 100644
--- a/scripts/pars_dsp_eopt.py
+++ b/scripts/pars_dsp_eopt.py
@@ -8,9 +8,10 @@
 import numpy as np
 import pygama.pargen.energy_optimisation as om  # noqa: F401
 import sklearn.gaussian_process.kernels as ker
+from dbetto import TextDB
+from dbetto.catalog import Props
 from dspeed.units import unit_registry as ureg
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from legendmeta import LegendMetadata
 from library.log import build_log
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_event_selection.py
index 64964c2..177eba6 100644
--- a/scripts/pars_dsp_event_selection.py
+++ b/scripts/pars_dsp_event_selection.py
@@ -10,8 +10,9 @@
 import numpy as np
 import pygama.math.histogram as pgh
 import pygama.pargen.energy_cal as pgc
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.log import build_log
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt.py
index f1f0f5c..53188ba 100644
--- a/scripts/pars_dsp_nopt.py
+++ b/scripts/pars_dsp_nopt.py
@@ -6,8 +6,9 @@
 import lgdo.lh5 as lh5
 import numpy as np
 import pygama.pargen.noise_optimization as pno
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.log import build_log
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm.py
index 359bc3f..67d8a64 100644
--- a/scripts/pars_dsp_svm.py
+++ b/scripts/pars_dsp_svm.py
@@ -1,7 +1,7 @@
 import argparse
 from pathlib import Path
 
-from legendmeta.catalog import Props
+from dbetto.catalog import Props
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau.py
index 1ac3451..9a38526 100644
--- a/scripts/pars_dsp_tau.py
+++ b/scripts/pars_dsp_tau.py
@@ -4,8 +4,9 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.log import build_log
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
diff --git a/scripts/pars_hit_aoe.py b/scripts/pars_hit_aoe.py
index bfc681f..575d3de 100644
--- a/scripts/pars_hit_aoe.py
+++ b/scripts/pars_hit_aoe.py
@@ -8,8 +8,9 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.convert_np import convert_dict_np_to_float
 from library.log import build_log
 from pygama.pargen.AoE_cal import *  # noqa: F403
diff --git a/scripts/pars_hit_ecal.py b/scripts/pars_hit_ecal.py
index 87a6afd..488463c 100644
--- a/scripts/pars_hit_ecal.py
+++ b/scripts/pars_hit_ecal.py
@@ -14,8 +14,9 @@
 import numpy as np
 import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.convert_np import convert_dict_np_to_float
 from library.log import build_log
 from matplotlib.colors import LogNorm
diff --git a/scripts/pars_hit_lq.py b/scripts/pars_hit_lq.py
index db721af..4a75a06 100644
--- a/scripts/pars_hit_lq.py
+++ b/scripts/pars_hit_lq.py
@@ -7,8 +7,9 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.convert_np import convert_dict_np_to_float
 from library.log import build_log
 from pygama.math.distributions import gaussian
diff --git a/scripts/pars_hit_qc.py b/scripts/pars_hit_qc.py
index 97a2720..460e858 100644
--- a/scripts/pars_hit_qc.py
+++ b/scripts/pars_hit_qc.py
@@ -9,8 +9,9 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from library.convert_np import convert_dict_np_to_float
 from library.log import build_log
diff --git a/scripts/pars_pht_aoecal.py b/scripts/pars_pht_aoecal.py
index bd6d484..f46fb7b 100644
--- a/scripts/pars_pht_aoecal.py
+++ b/scripts/pars_pht_aoecal.py
@@ -11,8 +11,9 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.FileKey import ChannelProcKey, ProcessingFileKey
 from library.log import build_log
 from pygama.pargen.AoE_cal import *  # noqa: F403
diff --git a/scripts/pars_pht_fast.py b/scripts/pars_pht_fast.py
index 1db32ad..cf90b94 100644
--- a/scripts/pars_pht_fast.py
+++ b/scripts/pars_pht_fast.py
@@ -8,8 +8,9 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.FileKey import ChannelProcKey, ProcessingFileKey
 from library.log import build_log
 from pars_pht_aoecal import run_aoe_calibration
diff --git a/scripts/pars_pht_lqcal.py b/scripts/pars_pht_lqcal.py
index 9e2f29a..d470480 100644
--- a/scripts/pars_pht_lqcal.py
+++ b/scripts/pars_pht_lqcal.py
@@ -9,8 +9,9 @@
 
 import numpy as np
 import pandas as pd
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.FileKey import ChannelProcKey, ProcessingFileKey
 from library.log import build_log
 from pygama.math.distributions import gaussian
diff --git a/scripts/pars_pht_partcal.py b/scripts/pars_pht_partcal.py
index 4915494..b726b96 100644
--- a/scripts/pars_pht_partcal.py
+++ b/scripts/pars_pht_partcal.py
@@ -11,8 +11,9 @@
 import pandas as pd
 import pygama.math.distributions as pgf
 import pygama.math.histogram as pgh
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.FileKey import ChannelProcKey, ProcessingFileKey
 from library.log import build_log
 from pygama.math.distributions import nb_poly
diff --git a/scripts/pars_pht_qc.py b/scripts/pars_pht_qc.py
index 02afade..e3fbd12 100644
--- a/scripts/pars_pht_qc.py
+++ b/scripts/pars_pht_qc.py
@@ -9,8 +9,9 @@
 from pathlib import Path
 
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from library.convert_np import convert_dict_np_to_float
 from library.log import build_log
diff --git a/scripts/pars_pht_qc_phy.py b/scripts/pars_pht_qc_phy.py
index 86328dc..c235064 100644
--- a/scripts/pars_pht_qc_phy.py
+++ b/scripts/pars_pht_qc_phy.py
@@ -10,8 +10,9 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from library.convert_np import convert_dict_np_to_float
 from library.log import build_log
diff --git a/scripts/pars_tcm_pulser.py b/scripts/pars_tcm_pulser.py
index f230ad0..b7618d1 100644
--- a/scripts/pars_tcm_pulser.py
+++ b/scripts/pars_tcm_pulser.py
@@ -4,8 +4,9 @@
 
 import lgdo.lh5 as lh5
 import numpy as np
-from legendmeta import LegendMetadata, TextDB
-from legendmeta.catalog import Props
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from library.log import build_log
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 

From 85a2d9dd7cd1e7a2e0cb64d3f106265b5f6a557a Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 28 Jan 2025 12:33:43 +0100
Subject: [PATCH 050/101] add threshold extraction

---
 scripts/check_blinding.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/check_blinding.py b/scripts/check_blinding.py
index f5dd378..a81a1a3 100644
--- a/scripts/check_blinding.py
+++ b/scripts/check_blinding.py
@@ -87,7 +87,10 @@
 # will always pass this check
 if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False:
     Path(args.output).mkdir(parents=True, exist_ok=True)
-    Props.write_to(args.output, {})
+    Props.write_to(
+        args.output,
+        {"threshold_adc": np.nanmin(daqenergy), "threshold_kev": np.nanmin(daqenergy_cal)},
+    )
 else:
     msg = "peaks not found in daqenergy"
     raise RuntimeError(msg)

From ef8996691fecf3d7b7e7d4ca282687b1258f5de7 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 28 Jan 2025 12:45:02 +0100
Subject: [PATCH 051/101] new structure

---
 config-lngs.yaml                              | 71 +++++++++++++++
 config-nersc.yaml                             | 69 ++++++++++++++
 templates/config-nersc.json                   | 87 ------------------
 templates/config.json                         | 89 -------------------
 Snakefile => workflow/Snakefile               |  1 +
 .../Snakefile-build-raw                       |  0
 .../profiles}/build-raw/config.yaml           |  0
 .../profiles}/default/config.yaml             |  0
 .../profiles}/legend-data/config.yaml         |  0
 {rules => workflow/rules}/ann.smk             |  0
 .../rules}/blinding_calibration.smk           |  0
 {rules => workflow/rules}/blinding_check.smk  |  0
 {rules => workflow/rules}/chanlist_gen.smk    |  0
 {rules => workflow/rules}/common.smk          |  0
 {rules => workflow/rules}/dsp.smk             |  0
 {rules => workflow/rules}/evt.smk             |  0
 {rules => workflow/rules}/filelist_gen.smk    |  0
 {rules => workflow/rules}/hit.smk             |  0
 {rules => workflow/rules}/main.smk            |  0
 {rules => workflow/rules}/pht.smk             |  0
 {rules => workflow/rules}/pht_fast.smk        |  0
 {rules => workflow/rules}/psp.smk             |  0
 {rules => workflow/rules}/qc_phy.smk          |  0
 {rules => workflow/rules}/raw.smk             |  0
 {rules => workflow/rules}/skm.smk             |  0
 {rules => workflow/rules}/tcm.smk             |  0
 {scripts => workflow/scripts}/__init__.py     |  0
 .../scripts}/blinding_calibration.py          |  0
 {scripts => workflow/scripts}/build_dsp.py    |  0
 {scripts => workflow/scripts}/build_evt.py    |  0
 {scripts => workflow/scripts}/build_fdb.py    |  0
 {scripts => workflow/scripts}/build_hit.py    |  0
 .../scripts}/build_raw_blind.py               |  0
 .../scripts}/build_raw_fcio.py                |  0
 .../scripts}/build_raw_orca.py                |  0
 {scripts => workflow/scripts}/build_skm.py    |  0
 {scripts => workflow/scripts}/build_tcm.py    |  0
 .../scripts}/check_blinding.py                |  0
 {scripts => workflow/scripts}/complete_run.py |  0
 .../scripts}/create_chankeylist.py            |  0
 .../scripts}/library/FileKey.py               |  0
 .../scripts}/library/__init__.py              |  0
 .../scripts}/library/cal_grouping.py          |  0
 .../scripts}/library/convert_np.py            |  0
 .../scripts}/library/create_pars_keylist.py   |  0
 {scripts => workflow/scripts}/library/log.py  |  0
 .../scripts}/library/pars_loading.py          |  0
 .../scripts}/library/patterns.py              |  0
 .../scripts}/library/utils.py                 |  0
 .../scripts}/merge_channels.py                |  0
 {scripts => workflow/scripts}/par_psp.py      |  0
 .../scripts}/pars_dsp_build_svm.py            |  0
 .../scripts}/pars_dsp_dplms.py                |  0
 .../scripts}/pars_dsp_eopt.py                 |  0
 .../scripts}/pars_dsp_event_selection.py      |  0
 .../scripts}/pars_dsp_nopt.py                 |  0
 {scripts => workflow/scripts}/pars_dsp_svm.py |  0
 {scripts => workflow/scripts}/pars_dsp_tau.py |  0
 {scripts => workflow/scripts}/pars_hit_aoe.py |  0
 .../scripts}/pars_hit_ecal.py                 |  0
 {scripts => workflow/scripts}/pars_hit_lq.py  |  0
 {scripts => workflow/scripts}/pars_hit_qc.py  |  0
 .../scripts}/pars_pht_aoecal.py               |  0
 .../scripts}/pars_pht_fast.py                 |  0
 .../scripts}/pars_pht_lqcal.py                |  0
 .../scripts}/pars_pht_partcal.py              |  0
 {scripts => workflow/scripts}/pars_pht_qc.py  |  0
 .../scripts}/pars_pht_qc_phy.py               |  0
 .../scripts}/pars_tcm_pulser.py               |  0
 .../scripts}/write_filelist.py                |  0
 70 files changed, 141 insertions(+), 176 deletions(-)
 create mode 100644 config-lngs.yaml
 create mode 100644 config-nersc.yaml
 delete mode 100644 templates/config-nersc.json
 delete mode 100644 templates/config.json
 rename Snakefile => workflow/Snakefile (99%)
 rename Snakefile-build-raw => workflow/Snakefile-build-raw (100%)
 rename {profiles => workflow/profiles}/build-raw/config.yaml (100%)
 rename {profiles => workflow/profiles}/default/config.yaml (100%)
 rename {profiles => workflow/profiles}/legend-data/config.yaml (100%)
 rename {rules => workflow/rules}/ann.smk (100%)
 rename {rules => workflow/rules}/blinding_calibration.smk (100%)
 rename {rules => workflow/rules}/blinding_check.smk (100%)
 rename {rules => workflow/rules}/chanlist_gen.smk (100%)
 rename {rules => workflow/rules}/common.smk (100%)
 rename {rules => workflow/rules}/dsp.smk (100%)
 rename {rules => workflow/rules}/evt.smk (100%)
 rename {rules => workflow/rules}/filelist_gen.smk (100%)
 rename {rules => workflow/rules}/hit.smk (100%)
 rename {rules => workflow/rules}/main.smk (100%)
 rename {rules => workflow/rules}/pht.smk (100%)
 rename {rules => workflow/rules}/pht_fast.smk (100%)
 rename {rules => workflow/rules}/psp.smk (100%)
 rename {rules => workflow/rules}/qc_phy.smk (100%)
 rename {rules => workflow/rules}/raw.smk (100%)
 rename {rules => workflow/rules}/skm.smk (100%)
 rename {rules => workflow/rules}/tcm.smk (100%)
 rename {scripts => workflow/scripts}/__init__.py (100%)
 rename {scripts => workflow/scripts}/blinding_calibration.py (100%)
 rename {scripts => workflow/scripts}/build_dsp.py (100%)
 rename {scripts => workflow/scripts}/build_evt.py (100%)
 rename {scripts => workflow/scripts}/build_fdb.py (100%)
 rename {scripts => workflow/scripts}/build_hit.py (100%)
 rename {scripts => workflow/scripts}/build_raw_blind.py (100%)
 rename {scripts => workflow/scripts}/build_raw_fcio.py (100%)
 rename {scripts => workflow/scripts}/build_raw_orca.py (100%)
 rename {scripts => workflow/scripts}/build_skm.py (100%)
 rename {scripts => workflow/scripts}/build_tcm.py (100%)
 rename {scripts => workflow/scripts}/check_blinding.py (100%)
 rename {scripts => workflow/scripts}/complete_run.py (100%)
 rename {scripts => workflow/scripts}/create_chankeylist.py (100%)
 rename {scripts => workflow/scripts}/library/FileKey.py (100%)
 rename {scripts => workflow/scripts}/library/__init__.py (100%)
 rename {scripts => workflow/scripts}/library/cal_grouping.py (100%)
 rename {scripts => workflow/scripts}/library/convert_np.py (100%)
 rename {scripts => workflow/scripts}/library/create_pars_keylist.py (100%)
 rename {scripts => workflow/scripts}/library/log.py (100%)
 rename {scripts => workflow/scripts}/library/pars_loading.py (100%)
 rename {scripts => workflow/scripts}/library/patterns.py (100%)
 rename {scripts => workflow/scripts}/library/utils.py (100%)
 rename {scripts => workflow/scripts}/merge_channels.py (100%)
 rename {scripts => workflow/scripts}/par_psp.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_build_svm.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_dplms.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_eopt.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_event_selection.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_nopt.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_svm.py (100%)
 rename {scripts => workflow/scripts}/pars_dsp_tau.py (100%)
 rename {scripts => workflow/scripts}/pars_hit_aoe.py (100%)
 rename {scripts => workflow/scripts}/pars_hit_ecal.py (100%)
 rename {scripts => workflow/scripts}/pars_hit_lq.py (100%)
 rename {scripts => workflow/scripts}/pars_hit_qc.py (100%)
 rename {scripts => workflow/scripts}/pars_pht_aoecal.py (100%)
 rename {scripts => workflow/scripts}/pars_pht_fast.py (100%)
 rename {scripts => workflow/scripts}/pars_pht_lqcal.py (100%)
 rename {scripts => workflow/scripts}/pars_pht_partcal.py (100%)
 rename {scripts => workflow/scripts}/pars_pht_qc.py (100%)
 rename {scripts => workflow/scripts}/pars_pht_qc_phy.py (100%)
 rename {scripts => workflow/scripts}/pars_tcm_pulser.py (100%)
 rename {scripts => workflow/scripts}/write_filelist.py (100%)

diff --git a/config-lngs.yaml b/config-lngs.yaml
new file mode 100644
index 0000000..901cac8
--- /dev/null
+++ b/config-lngs.yaml
@@ -0,0 +1,71 @@
+setups:
+  l200:
+    paths:
+      sandbox_path: ''
+      tier_daq: $_/generated/tier/daq
+      tier_raw_blind: ''
+      workflow: $_/workflow
+      metadata: $_/inputs
+      config: $_/inputs/dataprod/config
+      par_overwrite: $_/inputs/dataprod/overrides
+      chan_map: $_/inputs/hardware/configuration
+      detector_db: $_/inputs/hardware/detectors
+      tier: $_/generated/tier
+      tier_raw: $_/generated/tier/raw
+      tier_tcm: $_/generated/tier/tcm
+      tier_dsp: $_/generated/tier/dsp
+      tier_hit: $_/generated/tier/hit
+      tier_ann: $_/generated/tier/ann
+      tier_evt: $_/generated/tier/evt
+      tier_psp: $_/generated/tier/psp
+      tier_pht: $_/generated/tier/pht
+      tier_pan: $_/generated/tier/pan
+      tier_pet: $_/generated/tier/pet
+      tier_skm: $_/generated/tier/skm
+      par: $_/generated/par
+      par_raw: $_/generated/par/raw
+      par_tcm: $_/generated/par/tcm
+      par_dsp: $_/generated/par/dsp
+      par_hit: $_/generated/par/hit
+      par_evt: $_/generated/par/evt
+      par_psp: $_/generated/par/psp
+      par_pht: $_/generated/par/pht
+      par_pet: $_/generated/par/pet
+      plt: $_/generated/plt
+      log: $_/generated/log
+      tmp_plt: $_/generated/tmp/plt
+      tmp_log: $_/generated/tmp/log
+      tmp_filelists: $_/generated/tmp/filelists
+      tmp_par: $_/generated/tmp/par
+      src: $_/software/python/src
+      install: $_/software/python/install
+      cache: $_/software/python/cache
+    table_format:
+      raw: ch{ch:07d}/raw
+      dsp: ch{ch:07d}/dsp
+      psp: ch{ch:07d}/dsp
+      hit: ch{ch:07d}/hit
+      pht: ch{ch:07d}/hit
+      evt: '{grp}/evt'
+      pet: '{grp}/evt'
+      skm: '{grp}/skm'
+      tcm: hardware_tcm_1
+    execenv:
+      cmd: apptainer run
+      arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif
+      env:
+        HDF5_USE_FILE_LOCKING: 'False'
+        LGDO_BOUNDSCHECK: 'false'
+        DSPEED_BOUNDSCHECK: 'false'
+        PYGAMA_PARALLEL: 'false'
+        PYGAMA_FASTMATH: 'false'
+    pkg_versions:
+      pygama: pygama==2.0.3
+      pylegendmeta: pylegendmeta==1.1.0
+      dspeed: dspeed==1.6.1
+      legend-pydataobj: legend-pydataobj==1.9.0
+      legend-daq2lh5: legend-daq2lh5==1.2.2
+      tensorflow: tensorflow==2.17
+      keras: keras==3.6.0
+      jax: jax==0.4.30
+    meta_version: v0.5.7
diff --git a/config-nersc.yaml b/config-nersc.yaml
new file mode 100644
index 0000000..88b5156
--- /dev/null
+++ b/config-nersc.yaml
@@ -0,0 +1,69 @@
+setups:
+  l200:
+    paths:
+      sandbox_path: ''
+      tier_daq: $_/generated/tier/daq
+      tier_raw_blind: ''
+      workflow: $_/workflow
+      metadata: $_/inputs
+      config: $_/inputs/dataprod/config
+      par_overwrite: $_/inputs/dataprod/overrides
+      chan_map: $_/inputs/hardware/configuration
+      detector_db: $_/inputs/hardware/detectors
+      tier: $_/generated/tier
+      tier_raw: /dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-raw/generated/tier/raw
+      tier_tcm: /dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-v2.0.0/generated/tier/tcm
+      tier_dsp: $_/generated/tier/dsp
+      tier_hit: $_/generated/tier/hit
+      tier_evt: $_/generated/tier/evt
+      tier_psp: $_/generated/tier/psp
+      tier_pht: $_/generated/tier/pht
+      tier_pet: $_/generated/tier/pet
+      tier_skm: $_/generated/tier/skm
+      par: $_/generated/par
+      par_raw: $_/generated/par/raw
+      par_tcm: $_/generated/par/tcm
+      par_dsp: $_/generated/par/dsp
+      par_hit: $_/generated/par/hit
+      par_evt: $_/generated/par/evt
+      par_psp: $_/generated/par/psp
+      par_pht: $_/generated/par/pht
+      par_pet: $_/generated/par/pet
+      plt: $_/generated/plt
+      log: $_/generated/log
+      tmp_plt: $_/generated/tmp/plt
+      tmp_log: $_/generated/tmp/log
+      tmp_filelists: $_/generated/tmp/filelists
+      tmp_par: $_/generated/tmp/par
+      src: $_/software/python/src
+      install: $_/software/python/install
+      cache: $_/software/python/cache
+    table_format:
+      raw: ch{ch:07d}/raw
+      dsp: ch{ch:07d}/dsp
+      psp: ch{ch:07d}/dsp
+      hit: ch{ch:07d}/hit
+      pht: ch{ch:07d}/hit
+      evt: '{grp}/evt'
+      pet: '{grp}/evt'
+      skm: '{grp}/skm'
+      tcm: hardware_tcm_1
+    execenv:
+      cmd: shifter
+      arg: ' --image legendexp/legend-base:latest'
+      env:
+        HDF5_USE_FILE_LOCKING: 'FALSE'
+        LGDO_BOUNDSCHECK: 'false'
+        DSPEED_BOUNDSCHECK: 'false'
+        PYGAMA_PARALLEL: 'false'
+        PYGAMA_FASTMATH: 'false'
+    pkg_versions:
+      pygama: pygama==2.0.3
+      pylegendmeta: pylegendmeta==0.10.2
+      dspeed: dspeed==1.6.1
+      legend-pydataobj: legend-pydataobj==1.10.0
+      legend-daq2lh5: legend-daq2lh5==1.2.1
+      tensorflow: tensorflow==2.17
+      keras: keras==3.6.0
+      jax: jax==0.4.30
+    meta_version: v0.5.7
diff --git a/templates/config-nersc.json b/templates/config-nersc.json
deleted file mode 100644
index 9df4fe7..0000000
--- a/templates/config-nersc.json
+++ /dev/null
@@ -1,87 +0,0 @@
-{
-  "setups": {
-    "l200": {
-      "paths": {
-        "sandbox_path": "",
-        "tier_daq": "$_/generated/tier/daq",
-        "tier_raw_blind": "",
-
-        "workflow": "$_/workflow",
-
-        "metadata": "$_/inputs",
-        "config": "$_/inputs/dataprod/config",
-        "par_overwrite": "$_/inputs/dataprod/overrides",
-        "chan_map": "$_/inputs/hardware/configuration",
-        "detector_db": "$_/inputs/hardware/detectors",
-
-        "tier": "$_/generated/tier",
-        "tier_raw": "/dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-raw/generated/tier/raw",
-        "tier_tcm": "/dvs_ro/cfs/cdirs/m2676/users/pertoldi/legend-prodenv/prod-blind/ref-v2.0.0/generated/tier/tcm",
-        "tier_dsp": "$_/generated/tier/dsp",
-        "tier_hit": "$_/generated/tier/hit",
-        "tier_evt": "$_/generated/tier/evt",
-        "tier_psp": "$_/generated/tier/psp",
-        "tier_pht": "$_/generated/tier/pht",
-        "tier_pet": "$_/generated/tier/pet",
-        "tier_skm": "$_/generated/tier/skm",
-
-        "par": "$_/generated/par",
-        "par_raw": "$_/generated/par/raw",
-        "par_tcm": "$_/generated/par/tcm",
-        "par_dsp": "$_/generated/par/dsp",
-        "par_hit": "$_/generated/par/hit",
-        "par_evt": "$_/generated/par/evt",
-        "par_psp": "$_/generated/par/psp",
-        "par_pht": "$_/generated/par/pht",
-        "par_pet": "$_/generated/par/pet",
-
-        "plt": "$_/generated/plt",
-        "log": "$_/generated/log",
-
-        "tmp_plt": "$_/generated/tmp/plt",
-        "tmp_log": "$_/generated/tmp/log",
-        "tmp_filelists": "$_/generated/tmp/filelists",
-        "tmp_par": "$_/generated/tmp/par",
-
-        "src": "$_/software/python/src",
-        "install": "$_/software/python/install",
-        "cache": "$_/software/python/cache"
-      },
-
-      "table_format": {
-        "raw": "ch{ch:07d}/raw",
-        "dsp": "ch{ch:07d}/dsp",
-        "psp": "ch{ch:07d}/dsp",
-        "hit": "ch{ch:07d}/hit",
-        "pht": "ch{ch:07d}/hit",
-        "evt": "{grp}/evt",
-        "pet": "{grp}/evt",
-        "skm": "{grp}/skm",
-        "tcm": "hardware_tcm_1"
-      },
-
-      "execenv": {
-        "cmd": "shifter",
-        "arg": " --image legendexp/legend-base:latest",
-        "env": {
-          "HDF5_USE_FILE_LOCKING": "FALSE",
-          "LGDO_BOUNDSCHECK": "false",
-          "DSPEED_BOUNDSCHECK": "false",
-          "PYGAMA_PARALLEL": "false",
-          "PYGAMA_FASTMATH": "false"
-        }
-      },
-      "pkg_versions": {
-        "pygama": "pygama==2.0.3",
-        "pylegendmeta": "pylegendmeta==0.10.2",
-        "dbetto": "dbetto==1.0.6",
-        "dspeed": "dspeed==1.6.1",
-        "legend-pydataobj": "legend-pydataobj==1.10.0",
-        "legend-daq2lh5": "legend-daq2lh5==1.2.1",
-        "tensorflow": "tensorflow==2.17",
-        "keras": "keras==3.6.0",
-        "jax": "jax==0.4.30"
-      }
-    }
-  }
-}
diff --git a/templates/config.json b/templates/config.json
deleted file mode 100644
index 17f4bbf..0000000
--- a/templates/config.json
+++ /dev/null
@@ -1,89 +0,0 @@
-{
-  "setups": {
-    "l200": {
-      "paths": {
-        "sandbox_path": "",
-        "tier_daq": "$_/generated/tier/daq",
-        "tier_raw_blind": "",
-
-        "workflow": "$_/workflow",
-
-        "metadata": "$_/inputs",
-        "config": "$_/inputs/dataprod/config",
-        "par_overwrite": "$_/inputs/dataprod/overrides",
-        "chan_map": "$_/inputs/hardware/configuration",
-        "detector_db": "$_/inputs/hardware/detectors",
-
-        "tier": "$_/generated/tier",
-        "tier_raw": "$_/generated/tier/raw",
-        "tier_tcm": "$_/generated/tier/tcm",
-        "tier_dsp": "$_/generated/tier/dsp",
-        "tier_hit": "$_/generated/tier/hit",
-        "tier_ann": "$_/generated/tier/ann",
-        "tier_evt": "$_/generated/tier/evt",
-        "tier_psp": "$_/generated/tier/psp",
-        "tier_pht": "$_/generated/tier/pht",
-        "tier_pan": "$_/generated/tier/pan",
-        "tier_pet": "$_/generated/tier/pet",
-        "tier_skm": "$_/generated/tier/skm",
-
-        "par": "$_/generated/par",
-        "par_raw": "$_/generated/par/raw",
-        "par_tcm": "$_/generated/par/tcm",
-        "par_dsp": "$_/generated/par/dsp",
-        "par_hit": "$_/generated/par/hit",
-        "par_evt": "$_/generated/par/evt",
-        "par_psp": "$_/generated/par/psp",
-        "par_pht": "$_/generated/par/pht",
-        "par_pet": "$_/generated/par/pet",
-
-        "plt": "$_/generated/plt",
-        "log": "$_/generated/log",
-
-        "tmp_plt": "$_/generated/tmp/plt",
-        "tmp_log": "$_/generated/tmp/log",
-        "tmp_filelists": "$_/generated/tmp/filelists",
-        "tmp_par": "$_/generated/tmp/par",
-
-        "src": "$_/software/python/src",
-        "install": "$_/software/python/install",
-        "cache": "$_/software/python/cache"
-      },
-
-      "table_format": {
-        "raw": "ch{ch:07d}/raw",
-        "dsp": "ch{ch:07d}/dsp",
-        "psp": "ch{ch:07d}/dsp",
-        "hit": "ch{ch:07d}/hit",
-        "pht": "ch{ch:07d}/hit",
-        "evt": "{grp}/evt",
-        "pet": "{grp}/evt",
-        "skm": "{grp}/skm",
-        "tcm": "hardware_tcm_1"
-      },
-
-      "execenv": {
-        "cmd": "apptainer run",
-        "arg": "/data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif",
-        "env": {
-          "HDF5_USE_FILE_LOCKING": "False",
-          "LGDO_BOUNDSCHECK": "false",
-          "DSPEED_BOUNDSCHECK": "false",
-          "PYGAMA_PARALLEL": "false",
-          "PYGAMA_FASTMATH": "false"
-        }
-      },
-      "pkg_versions": {
-        "pygama": "pygama==2.0.3",
-        "pylegendmeta": "pylegendmeta==1.1.0",
-        "dbetto": "dbetto==1.0.6",
-        "dspeed": "dspeed==1.6.1",
-        "legend-pydataobj": "legend-pydataobj==1.9.0",
-        "legend-daq2lh5": "legend-daq2lh5==1.2.2",
-        "tensorflow": "tensorflow==2.17",
-        "keras": "keras==3.6.0",
-        "jax": "jax==0.4.30"
-      }
-    }
-  }
-}
diff --git a/Snakefile b/workflow/Snakefile
similarity index 99%
rename from Snakefile
rename to workflow/Snakefile
index 10a6855..3e25153 100644
--- a/Snakefile
+++ b/workflow/Snakefile
@@ -17,6 +17,7 @@ import glob
 from datetime import datetime
 from collections import OrderedDict
 import logging
+from pylegendmeta import LegendMetadata
 
 import scripts.library as lib
 from scripts.library.pars_loading import ParsCatalog
diff --git a/Snakefile-build-raw b/workflow/Snakefile-build-raw
similarity index 100%
rename from Snakefile-build-raw
rename to workflow/Snakefile-build-raw
diff --git a/profiles/build-raw/config.yaml b/workflow/profiles/build-raw/config.yaml
similarity index 100%
rename from profiles/build-raw/config.yaml
rename to workflow/profiles/build-raw/config.yaml
diff --git a/profiles/default/config.yaml b/workflow/profiles/default/config.yaml
similarity index 100%
rename from profiles/default/config.yaml
rename to workflow/profiles/default/config.yaml
diff --git a/profiles/legend-data/config.yaml b/workflow/profiles/legend-data/config.yaml
similarity index 100%
rename from profiles/legend-data/config.yaml
rename to workflow/profiles/legend-data/config.yaml
diff --git a/rules/ann.smk b/workflow/rules/ann.smk
similarity index 100%
rename from rules/ann.smk
rename to workflow/rules/ann.smk
diff --git a/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
similarity index 100%
rename from rules/blinding_calibration.smk
rename to workflow/rules/blinding_calibration.smk
diff --git a/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
similarity index 100%
rename from rules/blinding_check.smk
rename to workflow/rules/blinding_check.smk
diff --git a/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
similarity index 100%
rename from rules/chanlist_gen.smk
rename to workflow/rules/chanlist_gen.smk
diff --git a/rules/common.smk b/workflow/rules/common.smk
similarity index 100%
rename from rules/common.smk
rename to workflow/rules/common.smk
diff --git a/rules/dsp.smk b/workflow/rules/dsp.smk
similarity index 100%
rename from rules/dsp.smk
rename to workflow/rules/dsp.smk
diff --git a/rules/evt.smk b/workflow/rules/evt.smk
similarity index 100%
rename from rules/evt.smk
rename to workflow/rules/evt.smk
diff --git a/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk
similarity index 100%
rename from rules/filelist_gen.smk
rename to workflow/rules/filelist_gen.smk
diff --git a/rules/hit.smk b/workflow/rules/hit.smk
similarity index 100%
rename from rules/hit.smk
rename to workflow/rules/hit.smk
diff --git a/rules/main.smk b/workflow/rules/main.smk
similarity index 100%
rename from rules/main.smk
rename to workflow/rules/main.smk
diff --git a/rules/pht.smk b/workflow/rules/pht.smk
similarity index 100%
rename from rules/pht.smk
rename to workflow/rules/pht.smk
diff --git a/rules/pht_fast.smk b/workflow/rules/pht_fast.smk
similarity index 100%
rename from rules/pht_fast.smk
rename to workflow/rules/pht_fast.smk
diff --git a/rules/psp.smk b/workflow/rules/psp.smk
similarity index 100%
rename from rules/psp.smk
rename to workflow/rules/psp.smk
diff --git a/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
similarity index 100%
rename from rules/qc_phy.smk
rename to workflow/rules/qc_phy.smk
diff --git a/rules/raw.smk b/workflow/rules/raw.smk
similarity index 100%
rename from rules/raw.smk
rename to workflow/rules/raw.smk
diff --git a/rules/skm.smk b/workflow/rules/skm.smk
similarity index 100%
rename from rules/skm.smk
rename to workflow/rules/skm.smk
diff --git a/rules/tcm.smk b/workflow/rules/tcm.smk
similarity index 100%
rename from rules/tcm.smk
rename to workflow/rules/tcm.smk
diff --git a/scripts/__init__.py b/workflow/scripts/__init__.py
similarity index 100%
rename from scripts/__init__.py
rename to workflow/scripts/__init__.py
diff --git a/scripts/blinding_calibration.py b/workflow/scripts/blinding_calibration.py
similarity index 100%
rename from scripts/blinding_calibration.py
rename to workflow/scripts/blinding_calibration.py
diff --git a/scripts/build_dsp.py b/workflow/scripts/build_dsp.py
similarity index 100%
rename from scripts/build_dsp.py
rename to workflow/scripts/build_dsp.py
diff --git a/scripts/build_evt.py b/workflow/scripts/build_evt.py
similarity index 100%
rename from scripts/build_evt.py
rename to workflow/scripts/build_evt.py
diff --git a/scripts/build_fdb.py b/workflow/scripts/build_fdb.py
similarity index 100%
rename from scripts/build_fdb.py
rename to workflow/scripts/build_fdb.py
diff --git a/scripts/build_hit.py b/workflow/scripts/build_hit.py
similarity index 100%
rename from scripts/build_hit.py
rename to workflow/scripts/build_hit.py
diff --git a/scripts/build_raw_blind.py b/workflow/scripts/build_raw_blind.py
similarity index 100%
rename from scripts/build_raw_blind.py
rename to workflow/scripts/build_raw_blind.py
diff --git a/scripts/build_raw_fcio.py b/workflow/scripts/build_raw_fcio.py
similarity index 100%
rename from scripts/build_raw_fcio.py
rename to workflow/scripts/build_raw_fcio.py
diff --git a/scripts/build_raw_orca.py b/workflow/scripts/build_raw_orca.py
similarity index 100%
rename from scripts/build_raw_orca.py
rename to workflow/scripts/build_raw_orca.py
diff --git a/scripts/build_skm.py b/workflow/scripts/build_skm.py
similarity index 100%
rename from scripts/build_skm.py
rename to workflow/scripts/build_skm.py
diff --git a/scripts/build_tcm.py b/workflow/scripts/build_tcm.py
similarity index 100%
rename from scripts/build_tcm.py
rename to workflow/scripts/build_tcm.py
diff --git a/scripts/check_blinding.py b/workflow/scripts/check_blinding.py
similarity index 100%
rename from scripts/check_blinding.py
rename to workflow/scripts/check_blinding.py
diff --git a/scripts/complete_run.py b/workflow/scripts/complete_run.py
similarity index 100%
rename from scripts/complete_run.py
rename to workflow/scripts/complete_run.py
diff --git a/scripts/create_chankeylist.py b/workflow/scripts/create_chankeylist.py
similarity index 100%
rename from scripts/create_chankeylist.py
rename to workflow/scripts/create_chankeylist.py
diff --git a/scripts/library/FileKey.py b/workflow/scripts/library/FileKey.py
similarity index 100%
rename from scripts/library/FileKey.py
rename to workflow/scripts/library/FileKey.py
diff --git a/scripts/library/__init__.py b/workflow/scripts/library/__init__.py
similarity index 100%
rename from scripts/library/__init__.py
rename to workflow/scripts/library/__init__.py
diff --git a/scripts/library/cal_grouping.py b/workflow/scripts/library/cal_grouping.py
similarity index 100%
rename from scripts/library/cal_grouping.py
rename to workflow/scripts/library/cal_grouping.py
diff --git a/scripts/library/convert_np.py b/workflow/scripts/library/convert_np.py
similarity index 100%
rename from scripts/library/convert_np.py
rename to workflow/scripts/library/convert_np.py
diff --git a/scripts/library/create_pars_keylist.py b/workflow/scripts/library/create_pars_keylist.py
similarity index 100%
rename from scripts/library/create_pars_keylist.py
rename to workflow/scripts/library/create_pars_keylist.py
diff --git a/scripts/library/log.py b/workflow/scripts/library/log.py
similarity index 100%
rename from scripts/library/log.py
rename to workflow/scripts/library/log.py
diff --git a/scripts/library/pars_loading.py b/workflow/scripts/library/pars_loading.py
similarity index 100%
rename from scripts/library/pars_loading.py
rename to workflow/scripts/library/pars_loading.py
diff --git a/scripts/library/patterns.py b/workflow/scripts/library/patterns.py
similarity index 100%
rename from scripts/library/patterns.py
rename to workflow/scripts/library/patterns.py
diff --git a/scripts/library/utils.py b/workflow/scripts/library/utils.py
similarity index 100%
rename from scripts/library/utils.py
rename to workflow/scripts/library/utils.py
diff --git a/scripts/merge_channels.py b/workflow/scripts/merge_channels.py
similarity index 100%
rename from scripts/merge_channels.py
rename to workflow/scripts/merge_channels.py
diff --git a/scripts/par_psp.py b/workflow/scripts/par_psp.py
similarity index 100%
rename from scripts/par_psp.py
rename to workflow/scripts/par_psp.py
diff --git a/scripts/pars_dsp_build_svm.py b/workflow/scripts/pars_dsp_build_svm.py
similarity index 100%
rename from scripts/pars_dsp_build_svm.py
rename to workflow/scripts/pars_dsp_build_svm.py
diff --git a/scripts/pars_dsp_dplms.py b/workflow/scripts/pars_dsp_dplms.py
similarity index 100%
rename from scripts/pars_dsp_dplms.py
rename to workflow/scripts/pars_dsp_dplms.py
diff --git a/scripts/pars_dsp_eopt.py b/workflow/scripts/pars_dsp_eopt.py
similarity index 100%
rename from scripts/pars_dsp_eopt.py
rename to workflow/scripts/pars_dsp_eopt.py
diff --git a/scripts/pars_dsp_event_selection.py b/workflow/scripts/pars_dsp_event_selection.py
similarity index 100%
rename from scripts/pars_dsp_event_selection.py
rename to workflow/scripts/pars_dsp_event_selection.py
diff --git a/scripts/pars_dsp_nopt.py b/workflow/scripts/pars_dsp_nopt.py
similarity index 100%
rename from scripts/pars_dsp_nopt.py
rename to workflow/scripts/pars_dsp_nopt.py
diff --git a/scripts/pars_dsp_svm.py b/workflow/scripts/pars_dsp_svm.py
similarity index 100%
rename from scripts/pars_dsp_svm.py
rename to workflow/scripts/pars_dsp_svm.py
diff --git a/scripts/pars_dsp_tau.py b/workflow/scripts/pars_dsp_tau.py
similarity index 100%
rename from scripts/pars_dsp_tau.py
rename to workflow/scripts/pars_dsp_tau.py
diff --git a/scripts/pars_hit_aoe.py b/workflow/scripts/pars_hit_aoe.py
similarity index 100%
rename from scripts/pars_hit_aoe.py
rename to workflow/scripts/pars_hit_aoe.py
diff --git a/scripts/pars_hit_ecal.py b/workflow/scripts/pars_hit_ecal.py
similarity index 100%
rename from scripts/pars_hit_ecal.py
rename to workflow/scripts/pars_hit_ecal.py
diff --git a/scripts/pars_hit_lq.py b/workflow/scripts/pars_hit_lq.py
similarity index 100%
rename from scripts/pars_hit_lq.py
rename to workflow/scripts/pars_hit_lq.py
diff --git a/scripts/pars_hit_qc.py b/workflow/scripts/pars_hit_qc.py
similarity index 100%
rename from scripts/pars_hit_qc.py
rename to workflow/scripts/pars_hit_qc.py
diff --git a/scripts/pars_pht_aoecal.py b/workflow/scripts/pars_pht_aoecal.py
similarity index 100%
rename from scripts/pars_pht_aoecal.py
rename to workflow/scripts/pars_pht_aoecal.py
diff --git a/scripts/pars_pht_fast.py b/workflow/scripts/pars_pht_fast.py
similarity index 100%
rename from scripts/pars_pht_fast.py
rename to workflow/scripts/pars_pht_fast.py
diff --git a/scripts/pars_pht_lqcal.py b/workflow/scripts/pars_pht_lqcal.py
similarity index 100%
rename from scripts/pars_pht_lqcal.py
rename to workflow/scripts/pars_pht_lqcal.py
diff --git a/scripts/pars_pht_partcal.py b/workflow/scripts/pars_pht_partcal.py
similarity index 100%
rename from scripts/pars_pht_partcal.py
rename to workflow/scripts/pars_pht_partcal.py
diff --git a/scripts/pars_pht_qc.py b/workflow/scripts/pars_pht_qc.py
similarity index 100%
rename from scripts/pars_pht_qc.py
rename to workflow/scripts/pars_pht_qc.py
diff --git a/scripts/pars_pht_qc_phy.py b/workflow/scripts/pars_pht_qc_phy.py
similarity index 100%
rename from scripts/pars_pht_qc_phy.py
rename to workflow/scripts/pars_pht_qc_phy.py
diff --git a/scripts/pars_tcm_pulser.py b/workflow/scripts/pars_tcm_pulser.py
similarity index 100%
rename from scripts/pars_tcm_pulser.py
rename to workflow/scripts/pars_tcm_pulser.py
diff --git a/scripts/write_filelist.py b/workflow/scripts/write_filelist.py
similarity index 100%
rename from scripts/write_filelist.py
rename to workflow/scripts/write_filelist.py

From 054041d5d6af4e80431e61ab6c2cde639f26a0fd Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 28 Jan 2025 14:34:12 +0100
Subject: [PATCH 052/101] move dsp pars rules to dedicated file, scope rules
 with _geds

---
 Snakefile                                     |   2 +
 rules/dsp.smk                                 | 251 +----------------
 rules/dsp_pars_geds.smk                       | 252 ++++++++++++++++++
 rules/psp.smk                                 | 203 +-------------
 rules/psp_pars_geds.smk                       | 209 +++++++++++++++
 scripts/{par_psp.py => par_psp_geds.py}       |   0
 ...uild_svm.py => pars_dsp_build_svm_geds.py} |   0
 ...rs_dsp_dplms.py => pars_dsp_dplms_geds.py} |   0
 ...pars_dsp_eopt.py => pars_dsp_eopt_geds.py} |   0
 ...t_selection.py => pars_dsp_evtsel_geds.py} |   0
 ...pars_dsp_nopt.py => pars_dsp_nopt_geds.py} |   0
 .../{pars_dsp_svm.py => pars_dsp_svm_geds.py} |   0
 .../{pars_dsp_tau.py => pars_dsp_tau_geds.py} |   0
 13 files changed, 465 insertions(+), 452 deletions(-)
 create mode 100644 rules/dsp_pars_geds.smk
 create mode 100644 rules/psp_pars_geds.smk
 rename scripts/{par_psp.py => par_psp_geds.py} (100%)
 rename scripts/{pars_dsp_build_svm.py => pars_dsp_build_svm_geds.py} (100%)
 rename scripts/{pars_dsp_dplms.py => pars_dsp_dplms_geds.py} (100%)
 rename scripts/{pars_dsp_eopt.py => pars_dsp_eopt_geds.py} (100%)
 rename scripts/{pars_dsp_event_selection.py => pars_dsp_evtsel_geds.py} (100%)
 rename scripts/{pars_dsp_nopt.py => pars_dsp_nopt_geds.py} (100%)
 rename scripts/{pars_dsp_svm.py => pars_dsp_svm_geds.py} (100%)
 rename scripts/{pars_dsp_tau.py => pars_dsp_tau_geds.py} (100%)

diff --git a/Snakefile b/Snakefile
index 10a6855..eff8f05 100644
--- a/Snakefile
+++ b/Snakefile
@@ -62,7 +62,9 @@ include: "rules/chanlist_gen.smk"
 include: "rules/common.smk"
 include: "rules/main.smk"
 include: "rules/tcm.smk"
+include: "rules/dsp_pars_geds.smk"
 include: "rules/dsp.smk"
+include: "rules/psp_pars_geds.smk"
 include: "rules/psp.smk"
 include: "rules/hit.smk"
 include: "rules/pht.smk"
diff --git a/rules/dsp.smk b/rules/dsp.smk
index 8000fa2..f4f8487 100644
--- a/rules/dsp.smk
+++ b/rules/dsp.smk
@@ -1,7 +1,5 @@
 """
-Snakemake rules for processing dsp tier. This is done in 4 steps:
-- extraction of pole zero constant(s) for each channel from cal data
-- extraction of energy filter parameters and charge trapping correction for each channel from cal data
+Snakemake rules for processing dsp tier.
 - combining of all channels into single pars files with associated plot and results files
 - running dsp over all channels using par file
 """
@@ -11,9 +9,6 @@ from scripts.library.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
 from scripts.library.create_pars_keylist import ParsKeyResolve
 from scripts.library.patterns import (
-    get_pattern_pars_tmp_channel,
-    get_pattern_plts_tmp_channel,
-    get_pattern_log_channel,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
@@ -34,250 +29,6 @@ Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
 ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
 
 
-rule build_pars_dsp_tau:
-    input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")),
-    log:
-        get_pattern_log_channel(setup, "par_dsp_decay_constant"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_tau.py "
-        "--configs {configs} "
-        "--log {log} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--plot_path {output.plots} "
-        "--output_file {output.decay_const} "
-        "--pulser_file {input.pulser} "
-        "--raw_files {input.files} "
-
-
-rule build_pars_event_selection:
-    input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
-        ),
-        pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
-        raw_cal=get_blinding_curve_file,
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")),
-    log:
-        get_pattern_log_channel(setup, "par_dsp_event_selection"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-        mem_swap=70,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_event_selection.py "
-        "--configs {configs} "
-        "--log {log} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--peak_file {output.peak_file} "
-        "--pulser_file {input.pulser_file} "
-        "--decay_const {input.database} "
-        "--raw_cal {input.raw_cal} "
-        "--raw_filelist {input.files}"
-
-
-# This rule builds the optimal energy filter parameters for the dsp using fft files
-rule build_pars_dsp_nopt:
-    input:
-        files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
-        ),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
-        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        dsp_pars_nopt=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization")
-        ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")),
-    log:
-        get_pattern_log_channel(setup, "par_dsp_noise_optimization"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_nopt.py "
-        "--database {input.database} "
-        "--configs {configs} "
-        "--log {log} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--inplots {input.inplots} "
-        "--plot_path {output.plots} "
-        "--dsp_pars {output.dsp_pars_nopt} "
-        "--raw_filelist {input.files}"
-
-
-# This rule builds the dplms energy filter for the dsp using fft and cal files
-rule build_pars_dsp_dplms:
-    input:
-        fft_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
-        ),
-        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"),
-        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")),
-        lh5_path=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5")
-        ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
-    log:
-        get_pattern_log_channel(setup, "pars_dsp_dplms"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_dplms.py "
-        "--fft_raw_filelist {input.fft_files} "
-        "--peak_file {input.peak_file} "
-        "--database {input.database} "
-        "--inplots {input.inplots} "
-        "--configs {configs} "
-        "--log {log} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--dsp_pars {output.dsp_pars} "
-        "--lh5_path {output.lh5_path} "
-        "--plot_path {output.plots} "
-
-
-# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
-rule build_pars_dsp_eopt:
-    input:
-        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
-        decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"),
-        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")),
-        qbb_grid=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl")
-        ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
-    log:
-        get_pattern_log_channel(setup, "pars_dsp_eopt"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_eopt.py "
-        "--log {log} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--peak_file {input.peak_file} "
-        "--inplots {input.inplots} "
-        "--decay_const {input.decay_const} "
-        "--plot_path {output.plots} "
-        "--qbb_grid_path {output.qbb_grid} "
-        "--final_dsp_pars {output.dsp_pars}"
-
-
-rule build_svm_dsp:
-    input:
-        hyperpars=lambda wildcards: get_input_par_file(
-            wildcards, "dsp", "svm_hyperpars"
-        ),
-        train_data=lambda wildcards: str(
-            get_input_par_file(wildcards, "dsp", "svm_hyperpars")
-        ).replace("hyperpars.yaml", "train.lh5"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
-    log:
-        str(get_pattern_log(setup, "pars_dsp_svm")).replace("{datatype}", "cal"),
-    group:
-        "par-dsp-svm"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_build_svm.py "
-        "--log {log} "
-        "--configs {configs} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--train_data {input.train_data} "
-        "--train_hyperpars {input.hyperpars} "
-        "--output_file {output.dsp_pars}"
-
-
-rule build_pars_dsp_svm:
-    input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"),
-        svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"),
-    output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
-    log:
-        get_pattern_log_channel(setup, "pars_dsp_svm"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_svm.py "
-        "--log {log} "
-        "--input_file {input.dsp_pars} "
-        "--output_file {output.dsp_pars} "
-        "--svm_file {input.svm_file}"
-
-
 rule build_plts_dsp:
     input:
         lambda wildcards: get_plt_chanlist(
diff --git a/rules/dsp_pars_geds.smk b/rules/dsp_pars_geds.smk
new file mode 100644
index 0000000..7f16c9e
--- /dev/null
+++ b/rules/dsp_pars_geds.smk
@@ -0,0 +1,252 @@
+"""
+Snakemake rules for building dsp pars for HPGes, before running build_dsp()
+- extraction of pole zero constant(s) for each channel from cal data
+- extraction of energy filter parameters and charge trapping correction for each channel from cal data
+"""
+
+from scripts.util.create_pars_keylist import pars_key_resolve
+from scripts.util.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_log_channel,
+    get_pattern_tier_raw,
+    get_pattern_log,
+    get_pattern_pars,
+)
+
+dsp_par_catalog = pars_key_resolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    get_pattern_tier_raw(setup),
+    {"cal": ["par_dsp"], "lar": ["par_dsp"]},
+)
+
+
+rule build_pars_dsp_tau_geds:
+    input:
+        files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
+        ),
+        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")),
+    log:
+        get_pattern_log_channel(setup, "par_dsp_decay_constant"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_tau_geds.py "
+        "--configs {configs} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--plot_path {output.plots} "
+        "--output_file {output.decay_const} "
+        "--pulser_file {input.pulser} "
+        "--raw_files {input.files}"
+
+
+rule build_pars_evtsel_geds:
+    input:
+        files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
+        ),
+        pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
+        raw_cal=get_blinding_curve_file,
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")),
+    log:
+        get_pattern_log_channel(setup, "par_dsp_event_selection"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+        mem_swap=70,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_evtsel_geds.py "
+        "--configs {configs} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--peak_file {output.peak_file} "
+        "--pulser_file {input.pulser_file} "
+        "--decay_const {input.database} "
+        "--raw_cal {input.raw_cal} "
+        "--raw_filelist {input.files}"
+
+
+# This rule builds the optimal energy filter parameters for the dsp using fft files
+rule build_pars_dsp_nopt_geds:
+    input:
+        files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
+        ),
+        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
+        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        dsp_pars_nopt=temp(
+            get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization")
+        ),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")),
+    log:
+        get_pattern_log_channel(setup, "par_dsp_noise_optimization"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_nopt_geds.py "
+        "--database {input.database} "
+        "--configs {configs} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--inplots {input.inplots} "
+        "--plot_path {output.plots} "
+        "--dsp_pars {output.dsp_pars_nopt} "
+        "--raw_filelist {input.files}"
+
+
+# This rule builds the dplms energy filter for the dsp using fft and cal files
+rule build_pars_dsp_dplms_geds:
+    input:
+        fft_files=os.path.join(
+            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
+        ),
+        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
+        database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"),
+        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")),
+        lh5_path=temp(
+            get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5")
+        ),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_dplms"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_dplms_geds.py "
+        "--fft_raw_filelist {input.fft_files} "
+        "--peak_file {input.peak_file} "
+        "--database {input.database} "
+        "--inplots {input.inplots} "
+        "--configs {configs} "
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--dsp_pars {output.dsp_pars} "
+        "--lh5_path {output.lh5_path} "
+        "--plot_path {output.plots} "
+
+
+# This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
+rule build_pars_dsp_eopt_geds:
+    input:
+        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
+        decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"),
+        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")),
+        qbb_grid=temp(
+            get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl")
+        ),
+        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_eopt"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_eopt_geds.py "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--peak_file {input.peak_file} "
+        "--inplots {input.inplots} "
+        "--decay_const {input.decay_const} "
+        "--plot_path {output.plots} "
+        "--qbb_grid_path {output.qbb_grid} "
+        "--final_dsp_pars {output.dsp_pars}"
+
+
+rule build_svm_dsp_geds:
+    input:
+        hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
+        train_data=lambda wildcards: get_svm_file(
+            wildcards, "dsp", "svm_hyperpars"
+        ).replace("hyperpars.json", "train.lh5"),
+    output:
+        dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
+    log:
+        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"),
+    group:
+        "par-dsp-svm"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_build_svm_geds.py "
+        "--log {log} "
+        "--train_data {input.train_data} "
+        "--train_hyperpars {input.hyperpars} "
+        "--output_file {output.dsp_pars}"
+
+
+rule build_pars_dsp_svm_geds:
+    input:
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"),
+        svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"),
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_svm"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_svm_geds.py "
+        "--log {log} "
+        "--input_file {input.dsp_pars} "
+        "--output_file {output.dsp_pars} "
+        "--svm_file {input.svm_file}"
diff --git a/rules/psp.smk b/rules/psp.smk
index eed63ae..dc0cfe5 100644
--- a/rules/psp.smk
+++ b/rules/psp.smk
@@ -1,7 +1,5 @@
 """
-Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps:
-- extraction of calibration curves(s) for each run for each channel from cal data
-- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
+Snakemake rules for processing psp (partition dsp) tier data.
 - combining of all channels into single pars files with associated plot and results files
 - running build hit over all channels using par file
 """
@@ -9,11 +7,7 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 from scripts.library.pars_loading import ParsCatalog
 from scripts.library.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.library.utils import set_last_rule_name
 from scripts.library.patterns import (
-    get_pattern_pars_tmp_channel,
-    get_pattern_plts_tmp_channel,
-    get_pattern_log_channel,
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
@@ -33,201 +27,6 @@ if psp_par_cat_file.is_file():
 Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
 ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
 
-psp_rules = {}
-for key, dataset in part.datasets.items():
-    for partition in dataset.keys():
-
-        rule:
-            input:
-                dsp_pars=part.get_par_files(
-                    dsp_par_catalog,
-                    partition,
-                    key,
-                    tier="dsp",
-                    name="eopt",
-                ),
-                dsp_objs=part.get_par_files(
-                    dsp_par_catalog,
-                    partition,
-                    key,
-                    tier="dsp",
-                    name="objects",
-                    extension="pkl",
-                ),
-                dsp_plots=part.get_plt_files(
-                    dsp_par_catalog, partition, key, tier="dsp"
-                ),
-            wildcard_constraints:
-                channel=part.get_wildcard_constraints(partition, key),
-            params:
-                datatype="cal",
-                channel="{channel}" if key == "default" else key,
-                timestamp=part.get_timestamp(
-                    psp_par_catalog, partition, key, tier="psp"
-                ),
-            output:
-                psp_pars=temp(
-                    part.get_par_files(
-                        psp_par_catalog,
-                        partition,
-                        key,
-                        tier="psp",
-                        name="eopt",
-                    )
-                ),
-                psp_objs=temp(
-                    part.get_par_files(
-                        psp_par_catalog,
-                        partition,
-                        key,
-                        tier="psp",
-                        name="objects",
-                        extension="pkl",
-                    )
-                ),
-                psp_plots=temp(
-                    part.get_plt_files(
-                        psp_par_catalog,
-                        partition,
-                        key,
-                        tier="psp",
-                    )
-                ),
-            log:
-                part.get_log_file(
-                    psp_par_catalog,
-                    partition,
-                    key,
-                    "psp",
-                    name="par_psp",
-                ),
-            group:
-                "par-psp"
-            resources:
-                runtime=300,
-            shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/par_psp.py "
-                "--log {log} "
-                "--configs {configs} "
-                "--datatype {params.datatype} "
-                "--timestamp {params.timestamp} "
-                "--channel {params.channel} "
-                "--in_plots {input.dsp_plots} "
-                "--out_plots {output.psp_plots} "
-                "--in_obj {input.dsp_objs} "
-                "--out_obj {output.psp_objs} "
-                "--input {input.dsp_pars} "
-                "--output {output.psp_pars} "
-
-        set_last_rule_name(workflow, f"{key}-{partition}-build_par_psp")
-
-        if key in psp_rules:
-            psp_rules[key].append(list(workflow.rules)[-1])
-        else:
-            psp_rules[key] = [list(workflow.rules)[-1]]
-
-
-# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
-# This rule builds the a/e calibration using the calibration dsp files for the whole partition
-rule build_par_psp:
-    input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"),
-        dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"),
-        dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"),
-    params:
-        datatype="cal",
-        channel="{channel}",
-        timestamp="{timestamp}",
-    output:
-        psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")),
-        psp_objs=temp(
-            get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl")
-        ),
-        psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
-    log:
-        get_pattern_log_channel(setup, "pars_psp"),
-    group:
-        "par-psp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/par_psp.py "
-        "--log {log} "
-        "--configs {configs} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--in_plots {input.dsp_plots} "
-        "--out_plots {output.psp_plots} "
-        "--in_obj {input.dsp_objs} "
-        "--out_obj {output.psp_objs} "
-        "--input {input.dsp_pars} "
-        "--output {output.psp_pars} "
-
-
-fallback_psp_rule = list(workflow.rules)[-1]
-rule_order_list = []
-ordered = OrderedDict(psp_rules)
-ordered.move_to_end("default")
-for key, items in ordered.items():
-    rule_order_list += [item.name for item in items]
-rule_order_list.append(fallback_psp_rule.name)
-workflow._ruleorder.add(*rule_order_list)  # [::-1]
-
-
-rule build_svm_psp:
-    input:
-        hyperpars=lambda wildcards: get_input_par_file(
-            wildcards, "psp", "svm_hyperpars"
-        ),
-        train_data=lambda wildcards: str(
-            get_input_par_file(wildcards, "psp", "svm_hyperpars")
-        ).replace("hyperpars.yaml", "train.lh5"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
-    log:
-        get_pattern_log(setup, "pars_psp_svm").as_posix().replace("{datatype}", "cal"),
-    group:
-        "par-dsp-svm"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_build_svm.py "
-        "--log {log} "
-        "--configs {configs} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--train_data {input.train_data} "
-        "--train_hyperpars {input.hyperpars} "
-        "--output_file {output.dsp_pars}"
-
-
-rule build_pars_psp_svm:
-    input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"),
-        svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"),
-    output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
-    log:
-        get_pattern_log_channel(setup, "pars_dsp_svm"),
-    group:
-        "par-dsp"
-    resources:
-        runtime=300,
-    shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_svm.py "
-        "--log {log} "
-        "--input_file {input.dsp_pars} "
-        "--output_file {output.dsp_pars} "
-        "--svm_file {input.svm_model}"
-
 
 rule build_pars_psp_objects:
     input:
diff --git a/rules/psp_pars_geds.smk b/rules/psp_pars_geds.smk
new file mode 100644
index 0000000..8d3d2c8
--- /dev/null
+++ b/rules/psp_pars_geds.smk
@@ -0,0 +1,209 @@
+"""
+Snakemake rules for processing psp (partition dsp) tier data.
+- extraction of calibration curves(s) for each run for each channel from cal data
+- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
+"""
+
+from scripts.util.pars_loading import pars_catalog
+from scripts.util.create_pars_keylist import pars_key_resolve
+from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name
+from scripts.util.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_log_channel,
+    get_pattern_log,
+    get_pattern_pars,
+)
+
+psp_par_catalog = pars_key_resolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    get_pattern_tier_raw(setup),
+    {"cal": ["par_psp"], "lar": ["par_psp"]},
+)
+
+psp_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                dsp_pars=part.get_par_files(
+                    dsp_par_catalog,
+                    partition,
+                    key,
+                    tier="dsp",
+                    name="eopt",
+                ),
+                dsp_objs=part.get_par_files(
+                    dsp_par_catalog,
+                    partition,
+                    key,
+                    tier="dsp",
+                    name="objects",
+                    extension="pkl",
+                ),
+                dsp_plots=part.get_plt_files(
+                    dsp_par_catalog, partition, key, tier="dsp"
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    psp_par_catalog, partition, key, tier="psp"
+                ),
+            output:
+                psp_pars=temp(
+                    part.get_par_files(
+                        psp_par_catalog,
+                        partition,
+                        key,
+                        tier="psp",
+                        name="eopt",
+                    )
+                ),
+                psp_objs=temp(
+                    part.get_par_files(
+                        psp_par_catalog,
+                        partition,
+                        key,
+                        tier="psp",
+                        name="objects",
+                        extension="pkl",
+                    )
+                ),
+                psp_plots=temp(
+                    part.get_plt_files(
+                        psp_par_catalog,
+                        partition,
+                        key,
+                        tier="psp",
+                    )
+                ),
+            log:
+                part.get_log_file(
+                    psp_par_catalog,
+                    partition,
+                    key,
+                    "psp",
+                    name="par_psp",
+                ),
+            group:
+                "par-psp"
+            resources:
+                runtime=300,
+            shell:
+                "{swenv} python3 -B "
+                "{basedir}/../scripts/par_psp_geds.py "
+                "--log {log} "
+                "--configs {configs} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--channel {params.channel} "
+                "--in_plots {input.dsp_plots} "
+                "--out_plots {output.psp_plots} "
+                "--in_obj {input.dsp_objs} "
+                "--out_obj {output.psp_objs} "
+                "--input {input.dsp_pars} "
+                "--output {output.psp_pars} "
+
+        set_last_rule_name(workflow, f"{key}-{partition}-build_par_psp")
+
+        if key in psp_rules:
+            psp_rules[key].append(list(workflow.rules)[-1])
+        else:
+            psp_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_par_psp:
+    input:
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"),
+        dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"),
+        dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")),
+        psp_objs=temp(
+            get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl")
+        ),
+        psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
+    log:
+        get_pattern_log_channel(setup, "pars_psp"),
+    group:
+        "par-psp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/par_psp.py "
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--in_plots {input.dsp_plots} "
+        "--out_plots {output.psp_plots} "
+        "--in_obj {input.dsp_objs} "
+        "--out_obj {output.psp_objs} "
+        "--input {input.dsp_pars} "
+        "--output {output.psp_pars} "
+
+
+fallback_psp_rule = list(workflow.rules)[-1]
+rule_order_list = []
+ordered = OrderedDict(psp_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_psp_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
+
+rule build_svm_psp:
+    input:
+        hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
+        train_data=lambda wildcards: get_svm_file(
+            wildcards, "psp", "svm_hyperpars"
+        ).replace("hyperpars.json", "train.lh5"),
+    output:
+        dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
+    log:
+        get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"),
+    group:
+        "par-dsp-svm"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_build_svm_geds.py "
+        "--log {log} "
+        "--train_data {input.train_data} "
+        "--train_hyperpars {input.hyperpars} "
+        "--output_file {output.dsp_pars}"
+
+
+rule build_pars_psp_svm:
+    input:
+        dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"),
+        svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"),
+    output:
+        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
+    log:
+        get_pattern_log_channel(setup, "pars_dsp_svm"),
+    group:
+        "par-dsp"
+    resources:
+        runtime=300,
+    shell:
+        "{swenv} python3 -B "
+        "{basedir}/../scripts/pars_dsp_svm_geds.py "
+        "--log {log} "
+        "--input_file {input.dsp_pars} "
+        "--output_file {output.dsp_pars} "
+        "--svm_file {input.svm_model}"
diff --git a/scripts/par_psp.py b/scripts/par_psp_geds.py
similarity index 100%
rename from scripts/par_psp.py
rename to scripts/par_psp_geds.py
diff --git a/scripts/pars_dsp_build_svm.py b/scripts/pars_dsp_build_svm_geds.py
similarity index 100%
rename from scripts/pars_dsp_build_svm.py
rename to scripts/pars_dsp_build_svm_geds.py
diff --git a/scripts/pars_dsp_dplms.py b/scripts/pars_dsp_dplms_geds.py
similarity index 100%
rename from scripts/pars_dsp_dplms.py
rename to scripts/pars_dsp_dplms_geds.py
diff --git a/scripts/pars_dsp_eopt.py b/scripts/pars_dsp_eopt_geds.py
similarity index 100%
rename from scripts/pars_dsp_eopt.py
rename to scripts/pars_dsp_eopt_geds.py
diff --git a/scripts/pars_dsp_event_selection.py b/scripts/pars_dsp_evtsel_geds.py
similarity index 100%
rename from scripts/pars_dsp_event_selection.py
rename to scripts/pars_dsp_evtsel_geds.py
diff --git a/scripts/pars_dsp_nopt.py b/scripts/pars_dsp_nopt_geds.py
similarity index 100%
rename from scripts/pars_dsp_nopt.py
rename to scripts/pars_dsp_nopt_geds.py
diff --git a/scripts/pars_dsp_svm.py b/scripts/pars_dsp_svm_geds.py
similarity index 100%
rename from scripts/pars_dsp_svm.py
rename to scripts/pars_dsp_svm_geds.py
diff --git a/scripts/pars_dsp_tau.py b/scripts/pars_dsp_tau_geds.py
similarity index 100%
rename from scripts/pars_dsp_tau.py
rename to scripts/pars_dsp_tau_geds.py

From c01cf27687bca8dc252c1c81efb7cc293c603f56 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 28 Jan 2025 18:41:55 +0100
Subject: [PATCH 053/101] update with prodenv stuff and uv

---
 .gitignore | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4eb2181..a904f40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,13 @@
 *~
 *.csv
 
+# uv
+uv.lock
+
+#prodenv_stuff
+inputs
+software
+generated
 
 # -------------------- github-generated stuff -------------------
 # Byte-compiled / optimized / DLL files

From 0c4b270de18d1c478134e54574ffee4737596de5 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 28 Jan 2025 18:42:48 +0100
Subject: [PATCH 054/101] add execenv scripts

---
 pyproject.toml                       |  37 +++++--
 workflow/scripts/library/__init__.py |   4 -
 workflow/scripts/library/execenv.py  | 148 +++++++++++++++++++++++++++
 3 files changed, 177 insertions(+), 12 deletions(-)
 create mode 100644 workflow/scripts/library/execenv.py

diff --git a/pyproject.toml b/pyproject.toml
index d96ee37..62ebab3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,14 +1,31 @@
-[tool.uv]
-package = false
-
 [tool.uv.workspace]
-exclude = ["rules", "templates", "scripts", "generated", "inputs", "software", "workflow"]
+exclude = ["generated", "inputs", "software", "workflow"]
+
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
+include-package-data = true
+zip-safe = false
+license-files = [
+    "LICENSE",
+]
 py-modules = []
 
+[tool.setuptools.package-dir]
+"" = "workflow"
+
+[tool.setuptools.packages.find]
+where = [
+    "workflow",
+]
+
+[tool.setuptools_scm]
+write_to = "workflow/_version.py"
+
 [project]
-name = "legend-dataflow"
+name = "legend_dataflow"
 description = "Python package for processing L200 data"
 authors = [
     {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"},
@@ -33,6 +50,7 @@ requires-python = ">=3.11"
 dependencies = [
     "dbetto>=1.0.5",
     "snakemake>=8.16",
+    #"pylegendmeta>=1.0.0", wait for new release
 ]
 dynamic = [
     "version",
@@ -47,17 +65,17 @@ no_container = [
   "legend-daq2lh5",
 ]
 test = [
- "legend-dataflow[no_container]",
+ "legend_dataflow[no_container]",
   "pytest >=6",
   "pytest-cov >=3",
 ]
 dev = [
- "legend-dataflow[no_container]",
+ "legend_dataflow[no_container]",
   "pytest >=6",
   "pytest-cov >=3",
 ]
 docs = [
-  "legend-dataflow[no_container]",
+  "legend_dataflow[no_container]",
   "sphinx>=7.0",
   "myst_parser>=0.13",
   "sphinx_inline_tabs",
@@ -65,3 +83,6 @@ docs = [
   "sphinx_autodoc_typehints",
   "furo>=2023.08.17",
 ]
+
+[project.scripts]
+dataprod = "scripts.library.execenv:dataprod"
diff --git a/workflow/scripts/library/__init__.py b/workflow/scripts/library/__init__.py
index 5aee6d5..f812d11 100644
--- a/workflow/scripts/library/__init__.py
+++ b/workflow/scripts/library/__init__.py
@@ -1,5 +1,4 @@
 from .cal_grouping import CalGrouping
-from .catalog import Catalog, Props, PropsStream
 from .create_pars_keylist import ParsKeyResolve
 from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey
 from .pars_loading import ParsCatalog
@@ -13,14 +12,11 @@
 
 __all__ = [
     "CalGrouping",
-    "Catalog",
     "ChannelProcKey",
     "FileKey",
     "ParsCatalog",
     "ParsKeyResolve",
     "ProcessingFileKey",
-    "Props",
-    "PropsStream",
     "runcmd",
     "subst_vars",
     "subst_vars",
diff --git a/workflow/scripts/library/execenv.py b/workflow/scripts/library/execenv.py
new file mode 100644
index 0000000..c4e249d
--- /dev/null
+++ b/workflow/scripts/library/execenv.py
@@ -0,0 +1,148 @@
+# ruff: noqa: T201
+from __future__ import annotations
+
+import argparse
+import os
+import shutil
+import string
+import subprocess
+from pathlib import Path
+
+import yaml
+
+
+def dataprod() -> None:
+    """dataprod's command-line interface for installing and loading the software in the data production environment.
+
+    .. code-block:: console
+
+      $ dataprod --help
+      $ dataprod load --help  # help section for a specific sub-command
+    """
+
+    parser = argparse.ArgumentParser(
+        prog="dataprod", description="dataprod's command-line interface"
+    )
+
+    subparsers = parser.add_subparsers()
+    parser_install = subparsers.add_parser(
+        "install", help="install user software in data production environment"
+    )
+    parser_install.add_argument(
+        "config_file", help="production cycle configuration file", type=str
+    )
+    parser_install.add_argument(
+        "-r", help="remove software directory before installing software", action="store_true"
+    )
+    parser_install.set_defaults(func=install)
+
+    parser_load = subparsers.add_parser(
+        "load", help="load data production environment and execute a given command"
+    )
+    parser_load.add_argument("config_file", help="production cycle configuration file", type=str)
+    parser_load.add_argument(
+        "command", help="command to run within the container", type=str, nargs="+"
+    )
+    parser_load.set_defaults(func=load)
+
+    args = parser.parse_args()
+    args.func(args)
+
+
+def install(args) -> None:
+    """
+    This function installs user software in the data production environment.
+    The software packages should be specified in the config.yaml file with the format:
+
+    ```yaml
+    setups:
+        l200:
+            pkg_versions:
+                package_name: package_version
+    ```
+    """
+    print(args.config_file)
+    if not Path(args.config_file).is_file():
+        msg = "config file is not a regular file"
+        raise RuntimeError(msg)
+
+    config_file_dir = Path(args.config_file).resolve().parent
+    with Path(args.config_file).open() as r:
+        config_dic = yaml.safe_load(r)
+
+    exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"]
+    exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"]
+    path_src = config_dic["setups"]["l200"]["paths"]["src"]
+    path_install = config_dic["setups"]["l200"]["paths"]["install"]
+    path_cache = config_dic["setups"]["l200"]["paths"]["cache"]
+
+    exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
+    exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})
+    path_src = Path(string.Template(path_src).substitute({"_": config_file_dir}))
+    path_install = Path(string.Template(path_install).substitute({"_": config_file_dir}))
+    path_cache = Path(string.Template(path_cache).substitute({"_": config_file_dir}))
+
+    if args.r:
+        shutil.rmtree(path_install)
+        shutil.rmtree(path_cache)
+
+    pkg_list = ""
+    for pkg, pkg_version in config_dic["setups"]["l200"]["pkg_versions"].items():
+        if (path_src / pkg).exists():
+            pkg_list += f" '{path_src / pkg}'"
+        else:
+            pkg_list += f" '{pkg_version}'"
+
+    cmd_expr = (
+        f"PYTHONUSERBASE={path_install} PIP_CACHE_DIR={path_cache} "
+        f"{exec_cmd} {exec_arg} python3 -B -m pip install --no-warn-script-location {pkg_list}"
+    )
+    print("INFO: running:", cmd_expr)
+    os.system(cmd_expr)
+
+
+def load(args) -> None:
+    """
+    This function loads the data production environment and executes a given command.
+    """
+
+    if not Path(args.config_file).is_file():
+        print("Error: config file does not exist")
+        exit()
+
+    config_file_dir = Path(args.config_file).resolve().parent
+    with Path(args.config_file).open() as r:
+        config_dic = yaml.safe_load(r)
+
+    exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"]
+    exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"]
+    env_vars = config_dic["setups"]["l200"]["execenv"]["env"]
+    path_install = config_dic["setups"]["l200"]["paths"]["install"]
+
+    exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
+    exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})
+    path_install = string.Template(path_install).substitute({"_": config_file_dir})
+
+    xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR")
+    if xdg_runtime_dir:
+        subprocess.run(
+            [*(exec_cmd.split()), exec_arg, *args.command],
+            env=dict(
+                PYTHONUSERBASE=path_install,
+                APPTAINERENV_APPEND_PATH=f":{path_install}/bin",
+                APPTAINER_BINDPATH=xdg_runtime_dir,
+                **env_vars,
+            ),
+            check=True,
+        )
+    else:
+        subprocess.run(
+            [*(exec_cmd.split()), exec_arg, *args.command],
+            env=dict(
+                PYTHONUSERBASE=path_install,
+                APPTAINERENV_APPEND_PATH=f":{path_install}/bin",
+                APPTAINER_BINDPATH=xdg_runtime_dir,
+                **env_vars,
+            ),
+            check=True,
+        )

From 88cafa5de757e7af458fe48f6adc410395b77df5 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 28 Jan 2025 18:44:17 +0100
Subject: [PATCH 055/101] add code for metadata checkout subject to release

---
 workflow/Snakefile           | 8 +++++++-
 workflow/Snakefile-build-raw | 7 ++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index dbc02b5..ba839be 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -46,9 +46,15 @@ chan_maps = chan_map_path(setup)
 meta = metadata_path(setup)
 det_status = det_status_path(setup)
 swenv = runcmd(setup)
-part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
 basedir = workflow.basedir
 
+# wait for new pylegendmeta release
+# if not Path(meta).exists():
+#     meta = LegendMetadata()
+#     meta.checkout(config["setups"]["l200"]["meta_version"])
+
+part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
+
 
 wildcard_constraints:
     experiment=r"\w+",
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index 5dddfa6..0dec789 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -34,9 +34,14 @@ chan_maps = chan_map_path(setup)
 swenv = runcmd(setup)
 meta = metadata_path(setup)
 det_status = det_status_path(setup)
-
 basedir = workflow.basedir
 
+# wait for new pylegendmeta release
+# if not Path(meta).exists():
+#     meta = LegendMetadata()
+#     meta.checkout(config["setups"]["l200"]["meta_version"])
+s
+
 
 wildcard_constraints:
     experiment=r"\w+",

From 93ad1b3668d48ee11e65e69b3bbab2c5de6739dd Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 11:22:23 +0100
Subject: [PATCH 056/101] update pre-commit config

---
 .pre-commit-config.yaml | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9217a46..96cec14 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -36,10 +36,18 @@ repos:
     - id: ruff
       args: ["--fix", "--show-fixes"]
 
-- repo: https://github.com/asottile/setup-cfg-fmt
-  rev: "v2.7.0"
+- repo: https://github.com/abravalheri/validate-pyproject
+  rev: "v0.23"
   hooks:
-  - id: setup-cfg-fmt
+  - id: validate-pyproject
+    additional_dependencies: ["validate-pyproject-schema-store[all]"]
+
+- repo: https://github.com/python-jsonschema/check-jsonschema
+  rev: "0.30.0"
+  hooks:
+  - id: check-dependabot
+  - id: check-github-workflows
+  - id: check-readthedocs
 
 - repo: https://github.com/pre-commit/mirrors-mypy
   rev: "v1.14.1"

From 7cf0a1b90569acdbbf8704fd15a5a8e80f663785 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 11:22:50 +0100
Subject: [PATCH 057/101] Move library to own package and install that

---
 pyproject.toml                                | 31 +++++++++----------
 workflow/scripts/build_dsp.py                 |  2 +-
 workflow/scripts/build_evt.py                 |  2 +-
 workflow/scripts/build_hit.py                 |  2 +-
 workflow/scripts/build_raw_blind.py           |  2 +-
 workflow/scripts/build_raw_fcio.py            |  2 +-
 workflow/scripts/build_raw_orca.py            |  2 +-
 workflow/scripts/build_skm.py                 |  2 +-
 workflow/scripts/build_tcm.py                 |  2 +-
 workflow/scripts/check_blinding.py            |  2 +-
 workflow/scripts/complete_run.py              |  6 ++--
 workflow/scripts/merge_channels.py            |  2 +-
 workflow/scripts/par_psp_geds.py              |  2 +-
 workflow/scripts/pars_dsp_build_svm_geds.py   |  2 +-
 workflow/scripts/pars_dsp_dplms_geds.py       |  2 +-
 workflow/scripts/pars_dsp_eopt_geds.py        |  2 +-
 workflow/scripts/pars_dsp_evtsel_geds.py      |  2 +-
 workflow/scripts/pars_dsp_nopt_geds.py        |  2 +-
 workflow/scripts/pars_dsp_tau_geds.py         |  2 +-
 workflow/scripts/pars_hit_aoe.py              |  4 +--
 workflow/scripts/pars_hit_ecal.py             |  4 +--
 workflow/scripts/pars_hit_lq.py               |  4 +--
 workflow/scripts/pars_hit_qc.py               |  4 +--
 workflow/scripts/pars_pht_aoecal.py           |  4 +--
 workflow/scripts/pars_pht_fast.py             |  4 +--
 workflow/scripts/pars_pht_lqcal.py            |  4 +--
 workflow/scripts/pars_pht_partcal.py          |  4 +--
 workflow/scripts/pars_pht_qc.py               |  4 +--
 workflow/scripts/pars_pht_qc_phy.py           |  4 +--
 workflow/scripts/pars_tcm_pulser.py           |  2 +-
 .../library => src/legenddataflow}/FileKey.py |  0
 .../legenddataflow}/__init__.py               |  0
 .../legenddataflow}/cal_grouping.py           |  0
 .../legenddataflow}/convert_np.py             |  0
 .../legenddataflow}/create_pars_keylist.py    |  0
 .../library => src/legenddataflow}/execenv.py |  0
 .../library => src/legenddataflow}/log.py     |  0
 .../legenddataflow}/pars_loading.py           |  0
 .../legenddataflow}/patterns.py               |  0
 .../library => src/legenddataflow}/utils.py   |  0
 40 files changed, 55 insertions(+), 58 deletions(-)
 rename workflow/{scripts/library => src/legenddataflow}/FileKey.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/__init__.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/cal_grouping.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/convert_np.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/create_pars_keylist.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/execenv.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/log.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/pars_loading.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/patterns.py (100%)
 rename workflow/{scripts/library => src/legenddataflow}/utils.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 62ebab3..cf0bc78 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,34 +9,31 @@ build-backend = "setuptools.build_meta"
 include-package-data = true
 zip-safe = false
 license-files = [
-    "LICENSE",
+    "LICENSE.md",
 ]
 py-modules = []
 
 [tool.setuptools.package-dir]
-"" = "workflow"
-
-[tool.setuptools.packages.find]
-where = [
-    "workflow",
-]
+"" = "workflow/src"
 
 [tool.setuptools_scm]
-write_to = "workflow/_version.py"
+write_to = "workflow/src/legenddataflow_version.py"
 
 [project]
 name = "legend_dataflow"
-description = "Python package for processing L200 data"
+description = "Python package for processing LEGEND-200 data"
 authors = [
     {name = "George Marshall", email = "george.marshall.20@ucl.ac.uk"},
     {name = "Luigi Pertoldi", email = "gipert@pm.me"},
-    {name = "The Legend Collaboration"},
+]
+maintainers = [
+    {name = "The LEGEND Collaboration"},
 ]
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: MIT Expat License",
+    "License :: OSI Approved :: MIT License",
     "Operating System :: MacOS",
     "Operating System :: POSIX",
     "Operating System :: Unix",
@@ -47,20 +44,19 @@ classifiers = [
 ]
 readme = "README.md"
 requires-python = ">=3.11"
+dynamic = ["version"]
+
 dependencies = [
     "dbetto>=1.0.5",
     "snakemake>=8.16",
-    #"pylegendmeta>=1.0.0", wait for new release
-]
-dynamic = [
-    "version",
+    "pylegendmeta==1.2.0a2",
 ]
 
 [project.optional-dependencies]
 no_container = [
   "pygama",
   "dspeed",
-  "pylegendmeta",
+  "pylegendmeta==1.2.0a2",
   "legend-pydataobj",
   "legend-daq2lh5",
 ]
@@ -73,6 +69,7 @@ dev = [
  "legend_dataflow[no_container]",
   "pytest >=6",
   "pytest-cov >=3",
+  "pre-commit",
 ]
 docs = [
   "legend_dataflow[no_container]",
@@ -85,4 +82,4 @@ docs = [
 ]
 
 [project.scripts]
-dataprod = "scripts.library.execenv:dataprod"
+dataprod = "legenddataflow.execenv:dataprod"
diff --git a/workflow/scripts/build_dsp.py b/workflow/scripts/build_dsp.py
index 6f97406..f6e44df 100644
--- a/workflow/scripts/build_dsp.py
+++ b/workflow/scripts/build_dsp.py
@@ -8,9 +8,9 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from dspeed import build_dsp
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo import lh5
-from library.log import build_log
 
 
 def replace_list_with_array(dic):
diff --git a/workflow/scripts/build_evt.py b/workflow/scripts/build_evt.py
index 6ef1e0f..5eac164 100644
--- a/workflow/scripts/build_evt.py
+++ b/workflow/scripts/build_evt.py
@@ -6,9 +6,9 @@
 import lgdo.lh5 as lh5
 import numpy as np
 from dbetto import Props, TextDB
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.types import Array
-from library.log import build_log
 from pygama.evt import build_evt
 
 sto = lh5.LH5Store()
diff --git a/workflow/scripts/build_hit.py b/workflow/scripts/build_hit.py
index 6310521..f096e0c 100644
--- a/workflow/scripts/build_hit.py
+++ b/workflow/scripts/build_hit.py
@@ -3,9 +3,9 @@
 from pathlib import Path
 
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata, TextDB
 from lgdo import lh5
-from library.log import build_log
 from pygama.hit.build_hit import build_hit
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/build_raw_blind.py b/workflow/scripts/build_raw_blind.py
index e343bde..5d582d4 100644
--- a/workflow/scripts/build_raw_blind.py
+++ b/workflow/scripts/build_raw_blind.py
@@ -16,9 +16,9 @@
 import numexpr as ne
 import numpy as np
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata, TextDB
 from lgdo import lh5
-from library.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
diff --git a/workflow/scripts/build_raw_fcio.py b/workflow/scripts/build_raw_fcio.py
index ddc765c..c3b577e 100644
--- a/workflow/scripts/build_raw_fcio.py
+++ b/workflow/scripts/build_raw_fcio.py
@@ -6,7 +6,7 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from library.log import build_log
+from legenddataflow.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/workflow/scripts/build_raw_orca.py b/workflow/scripts/build_raw_orca.py
index 0f5bbcb..c098806 100644
--- a/workflow/scripts/build_raw_orca.py
+++ b/workflow/scripts/build_raw_orca.py
@@ -6,7 +6,7 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from library.log import build_log
+from legenddataflow.log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/workflow/scripts/build_skm.py b/workflow/scripts/build_skm.py
index aefc31b..be2cfb3 100644
--- a/workflow/scripts/build_skm.py
+++ b/workflow/scripts/build_skm.py
@@ -3,9 +3,9 @@
 import awkward as ak
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
-from library.log import build_log
 
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
diff --git a/workflow/scripts/build_tcm.py b/workflow/scripts/build_tcm.py
index 2718c00..402c567 100644
--- a/workflow/scripts/build_tcm.py
+++ b/workflow/scripts/build_tcm.py
@@ -6,7 +6,7 @@
 from daq2lh5.orca import orca_flashcam
 from dbetto import TextDB
 from dbetto.catalog import Props
-from library.log import build_log
+from legenddataflow.log import build_log
 from pygama.evt.build_tcm import build_tcm
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/check_blinding.py b/workflow/scripts/check_blinding.py
index a81a1a3..2a47172 100644
--- a/workflow/scripts/check_blinding.py
+++ b/workflow/scripts/check_blinding.py
@@ -16,9 +16,9 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo import lh5
-from library.log import build_log
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
 
diff --git a/workflow/scripts/complete_run.py b/workflow/scripts/complete_run.py
index 7ffd73a..4d5cad7 100644
--- a/workflow/scripts/complete_run.py
+++ b/workflow/scripts/complete_run.py
@@ -7,9 +7,9 @@
 import time
 from pathlib import Path
 
-import library.utils as ut
-from library import patterns
-from library.FileKey import FileKey
+import legenddataflow.utils as ut
+from legenddataflow import patterns
+from legenddataflow.FileKey import FileKey
 
 print("INFO: dataflow ran successfully, now few final checks and scripts")
 
diff --git a/workflow/scripts/merge_channels.py b/workflow/scripts/merge_channels.py
index 209708d..1ca2026 100644
--- a/workflow/scripts/merge_channels.py
+++ b/workflow/scripts/merge_channels.py
@@ -5,9 +5,9 @@
 
 import numpy as np
 from dbetto.catalog import Props
+from legenddataflow.FileKey import ChannelProcKey
 from legendmeta import LegendMetadata
 from lgdo import lh5
-from library.FileKey import ChannelProcKey
 
 
 def replace_path(d, old_path, new_path):
diff --git a/workflow/scripts/par_psp_geds.py b/workflow/scripts/par_psp_geds.py
index d996f3c..c74ffa3 100644
--- a/workflow/scripts/par_psp_geds.py
+++ b/workflow/scripts/par_psp_geds.py
@@ -8,8 +8,8 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from dbetto.catalog import Props
+from legenddataflow.FileKey import ChannelProcKey
 from legendmeta import LegendMetadata
-from library.FileKey import ChannelProcKey
 
 mpl.use("Agg")
 
diff --git a/workflow/scripts/pars_dsp_build_svm_geds.py b/workflow/scripts/pars_dsp_build_svm_geds.py
index 7a0ecc9..3b7b7ea 100644
--- a/workflow/scripts/pars_dsp_build_svm_geds.py
+++ b/workflow/scripts/pars_dsp_build_svm_geds.py
@@ -4,8 +4,8 @@
 
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from lgdo import lh5
-from library.log import build_log
 from sklearn.svm import SVC
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/pars_dsp_dplms_geds.py b/workflow/scripts/pars_dsp_dplms_geds.py
index 457bda1..5d33fb8 100644
--- a/workflow/scripts/pars_dsp_dplms_geds.py
+++ b/workflow/scripts/pars_dsp_dplms_geds.py
@@ -8,9 +8,9 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo import Array, Table
-from library.log import build_log
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/pars_dsp_eopt_geds.py b/workflow/scripts/pars_dsp_eopt_geds.py
index a957c66..e59ee54 100644
--- a/workflow/scripts/pars_dsp_eopt_geds.py
+++ b/workflow/scripts/pars_dsp_eopt_geds.py
@@ -11,8 +11,8 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from dspeed.units import unit_registry as ureg
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.log import build_log
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
     BayesianOptimizer,
diff --git a/workflow/scripts/pars_dsp_evtsel_geds.py b/workflow/scripts/pars_dsp_evtsel_geds.py
index 177eba6..dc76878 100644
--- a/workflow/scripts/pars_dsp_evtsel_geds.py
+++ b/workflow/scripts/pars_dsp_evtsel_geds.py
@@ -12,8 +12,8 @@
 import pygama.pargen.energy_cal as pgc
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.log import build_log
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 
diff --git a/workflow/scripts/pars_dsp_nopt_geds.py b/workflow/scripts/pars_dsp_nopt_geds.py
index 53188ba..ae3aacb 100644
--- a/workflow/scripts/pars_dsp_nopt_geds.py
+++ b/workflow/scripts/pars_dsp_nopt_geds.py
@@ -8,8 +8,8 @@
 import pygama.pargen.noise_optimization as pno
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.log import build_log
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
 
diff --git a/workflow/scripts/pars_dsp_tau_geds.py b/workflow/scripts/pars_dsp_tau_geds.py
index 9a38526..1149c69 100644
--- a/workflow/scripts/pars_dsp_tau_geds.py
+++ b/workflow/scripts/pars_dsp_tau_geds.py
@@ -6,8 +6,8 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.log import build_log
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
diff --git a/workflow/scripts/pars_hit_aoe.py b/workflow/scripts/pars_hit_aoe.py
index 575d3de..d7fa221 100644
--- a/workflow/scripts/pars_hit_aoe.py
+++ b/workflow/scripts/pars_hit_aoe.py
@@ -10,9 +10,9 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.convert_np import convert_dict_np_to_float
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.convert_np import convert_dict_np_to_float
-from library.log import build_log
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
diff --git a/workflow/scripts/pars_hit_ecal.py b/workflow/scripts/pars_hit_ecal.py
index 488463c..8bf4f1f 100644
--- a/workflow/scripts/pars_hit_ecal.py
+++ b/workflow/scripts/pars_hit_ecal.py
@@ -16,9 +16,9 @@
 import pygama.math.histogram as pgh
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.convert_np import convert_dict_np_to_float
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.convert_np import convert_dict_np_to_float
-from library.log import build_log
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids
diff --git a/workflow/scripts/pars_hit_lq.py b/workflow/scripts/pars_hit_lq.py
index 4a75a06..c5f04cb 100644
--- a/workflow/scripts/pars_hit_lq.py
+++ b/workflow/scripts/pars_hit_lq.py
@@ -9,9 +9,9 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.convert_np import convert_dict_np_to_float
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.convert_np import convert_dict_np_to_float
-from library.log import build_log
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
diff --git a/workflow/scripts/pars_hit_qc.py b/workflow/scripts/pars_hit_qc.py
index 460e858..c9d380f 100644
--- a/workflow/scripts/pars_hit_qc.py
+++ b/workflow/scripts/pars_hit_qc.py
@@ -11,10 +11,10 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.convert_np import convert_dict_np_to_float
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
-from library.convert_np import convert_dict_np_to_float
-from library.log import build_log
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
diff --git a/workflow/scripts/pars_pht_aoecal.py b/workflow/scripts/pars_pht_aoecal.py
index f46fb7b..bbcf791 100644
--- a/workflow/scripts/pars_pht_aoecal.py
+++ b/workflow/scripts/pars_pht_aoecal.py
@@ -13,9 +13,9 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.FileKey import ChannelProcKey, ProcessingFileKey
-from library.log import build_log
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
diff --git a/workflow/scripts/pars_pht_fast.py b/workflow/scripts/pars_pht_fast.py
index cf90b94..1dfd1d6 100644
--- a/workflow/scripts/pars_pht_fast.py
+++ b/workflow/scripts/pars_pht_fast.py
@@ -10,9 +10,9 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.FileKey import ChannelProcKey, ProcessingFileKey
-from library.log import build_log
 from pars_pht_aoecal import run_aoe_calibration
 from pars_pht_lqcal import run_lq_calibration
 from pars_pht_partcal import calibrate_partition
diff --git a/workflow/scripts/pars_pht_lqcal.py b/workflow/scripts/pars_pht_lqcal.py
index d470480..8826efd 100644
--- a/workflow/scripts/pars_pht_lqcal.py
+++ b/workflow/scripts/pars_pht_lqcal.py
@@ -11,9 +11,9 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.FileKey import ChannelProcKey, ProcessingFileKey
-from library.log import build_log
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
diff --git a/workflow/scripts/pars_pht_partcal.py b/workflow/scripts/pars_pht_partcal.py
index b726b96..b3e43c4 100644
--- a/workflow/scripts/pars_pht_partcal.py
+++ b/workflow/scripts/pars_pht_partcal.py
@@ -13,9 +13,9 @@
 import pygama.math.histogram as pgh
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.FileKey import ChannelProcKey, ProcessingFileKey
-from library.log import build_log
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
diff --git a/workflow/scripts/pars_pht_qc.py b/workflow/scripts/pars_pht_qc.py
index e3fbd12..2ad477a 100644
--- a/workflow/scripts/pars_pht_qc.py
+++ b/workflow/scripts/pars_pht_qc.py
@@ -11,10 +11,10 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.convert_np import convert_dict_np_to_float
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
-from library.convert_np import convert_dict_np_to_float
-from library.log import build_log
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
diff --git a/workflow/scripts/pars_pht_qc_phy.py b/workflow/scripts/pars_pht_qc_phy.py
index c235064..791fa2b 100644
--- a/workflow/scripts/pars_pht_qc_phy.py
+++ b/workflow/scripts/pars_pht_qc_phy.py
@@ -12,10 +12,10 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.convert_np import convert_dict_np_to_float
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
-from library.convert_np import convert_dict_np_to_float
-from library.log import build_log
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
diff --git a/workflow/scripts/pars_tcm_pulser.py b/workflow/scripts/pars_tcm_pulser.py
index b7618d1..56700ec 100644
--- a/workflow/scripts/pars_tcm_pulser.py
+++ b/workflow/scripts/pars_tcm_pulser.py
@@ -6,8 +6,8 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
+from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
-from library.log import build_log
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/library/FileKey.py b/workflow/src/legenddataflow/FileKey.py
similarity index 100%
rename from workflow/scripts/library/FileKey.py
rename to workflow/src/legenddataflow/FileKey.py
diff --git a/workflow/scripts/library/__init__.py b/workflow/src/legenddataflow/__init__.py
similarity index 100%
rename from workflow/scripts/library/__init__.py
rename to workflow/src/legenddataflow/__init__.py
diff --git a/workflow/scripts/library/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py
similarity index 100%
rename from workflow/scripts/library/cal_grouping.py
rename to workflow/src/legenddataflow/cal_grouping.py
diff --git a/workflow/scripts/library/convert_np.py b/workflow/src/legenddataflow/convert_np.py
similarity index 100%
rename from workflow/scripts/library/convert_np.py
rename to workflow/src/legenddataflow/convert_np.py
diff --git a/workflow/scripts/library/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py
similarity index 100%
rename from workflow/scripts/library/create_pars_keylist.py
rename to workflow/src/legenddataflow/create_pars_keylist.py
diff --git a/workflow/scripts/library/execenv.py b/workflow/src/legenddataflow/execenv.py
similarity index 100%
rename from workflow/scripts/library/execenv.py
rename to workflow/src/legenddataflow/execenv.py
diff --git a/workflow/scripts/library/log.py b/workflow/src/legenddataflow/log.py
similarity index 100%
rename from workflow/scripts/library/log.py
rename to workflow/src/legenddataflow/log.py
diff --git a/workflow/scripts/library/pars_loading.py b/workflow/src/legenddataflow/pars_loading.py
similarity index 100%
rename from workflow/scripts/library/pars_loading.py
rename to workflow/src/legenddataflow/pars_loading.py
diff --git a/workflow/scripts/library/patterns.py b/workflow/src/legenddataflow/patterns.py
similarity index 100%
rename from workflow/scripts/library/patterns.py
rename to workflow/src/legenddataflow/patterns.py
diff --git a/workflow/scripts/library/utils.py b/workflow/src/legenddataflow/utils.py
similarity index 100%
rename from workflow/scripts/library/utils.py
rename to workflow/src/legenddataflow/utils.py

From 3eca65dd54cd1c39aa4f0955c11e8a3d7d2598dd Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 11:33:55 +0100
Subject: [PATCH 058/101] fix docs

---
 docs/Makefile  | 5 +++--
 pyproject.toml | 5 ++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index ff41907..b85f221 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -17,8 +17,9 @@ apidoc: clean-apidoc
       --module-first \
       --force \
       --output-dir "$(SOURCEDIR)/api" \
-      ../scripts \
-      ../rules
+      ../workflow/src/legenddataflow \
+      ../workflow/scripts \
+      ../workflow/rules
 
 clean-apidoc:
 	rm -rf "$(SOURCEDIR)/api"
diff --git a/pyproject.toml b/pyproject.toml
index cf0bc78..ee2f40c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,8 +16,11 @@ py-modules = []
 [tool.setuptools.package-dir]
 "" = "workflow/src"
 
+[tool.setuptools.packages.find]
+where = ["workflow/src"]
+
 [tool.setuptools_scm]
-write_to = "workflow/src/legenddataflow_version.py"
+write_to = "workflow/src/legenddataflow/_version.py"
 
 [project]
 name = "legend_dataflow"

From e4df0d314497e29bc4f9fd4e9f0cc3f2c1f0a4bf Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 11:54:18 +0100
Subject: [PATCH 059/101] pre-commit updates, minor format change in
 config.pkg_versions

---
 .pre-commit-config.yaml                | 168 +++++++++++++------------
 LICENSE.md                             |   9 +-
 config-lngs.yaml                       |  48 ++++---
 config-nersc.yaml                      |  22 ++--
 workflow/src/legenddataflow/execenv.py |  59 ++++-----
 5 files changed, 160 insertions(+), 146 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 96cec14..1b3a8b9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,97 +4,101 @@ ci:
   autofix_commit_msg: "style: pre-commit fixes"
 
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: "v5.0.0"
-  hooks:
-  - id: check-added-large-files
-  - id: check-case-conflict
-  - id: check-merge-conflict
-  - id: check-symlinks
-  - id: check-yaml
-  - id: check-json
-  - id: check-toml
-  - id: check-docstring-first
-  - id: debug-statements
-  - id: end-of-file-fixer
-  - id: forbid-new-submodules
-  - id: mixed-line-ending
-  - id: name-tests-test
-    args: ["--pytest-test-first"]
-  - id: requirements-txt-fixer
-  - id: trailing-whitespace
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: "v5.0.0"
+    hooks:
+      - id: check-added-large-files
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-yaml
+      - id: check-json
+      - id: check-toml
+      - id: check-docstring-first
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: forbid-new-submodules
+      - id: mixed-line-ending
+      - id: name-tests-test
+        args: ["--pytest-test-first"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
 
-- repo: https://github.com/psf/black
-  rev: "24.10.0"
-  hooks:
-  - id: black-jupyter
-    args: ["--line-length", "99"]
+  - repo: https://github.com/psf/black
+    rev: "24.10.0"
+    hooks:
+      - id: black-jupyter
+        args: ["--line-length", "99"]
 
-- repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: "v0.8.6"
-  hooks:
-    - id: ruff
-      args: ["--fix", "--show-fixes"]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: "v0.8.6"
+    hooks:
+      - id: ruff
+        args: ["--fix", "--show-fixes"]
 
-- repo: https://github.com/abravalheri/validate-pyproject
-  rev: "v0.23"
-  hooks:
-  - id: validate-pyproject
-    additional_dependencies: ["validate-pyproject-schema-store[all]"]
+  - repo: https://github.com/abravalheri/validate-pyproject
+    rev: "v0.23"
+    hooks:
+      - id: validate-pyproject
+        additional_dependencies: ["validate-pyproject-schema-store[all]"]
 
-- repo: https://github.com/python-jsonschema/check-jsonschema
-  rev: "0.30.0"
-  hooks:
-  - id: check-dependabot
-  - id: check-github-workflows
-  - id: check-readthedocs
+  - repo: https://github.com/python-jsonschema/check-jsonschema
+    rev: "0.30.0"
+    hooks:
+      - id: check-dependabot
+      - id: check-github-workflows
+      - id: check-readthedocs
 
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: "v1.14.1"
-  hooks:
-    - id: mypy
-      files: src
-      stages: [manual]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v1.14.1"
+    hooks:
+      - id: mypy
+        files: src
+        stages: [manual]
 
-- repo: https://github.com/kynan/nbstripout
-  rev: "0.8.1"
-  hooks:
-    - id: nbstripout
-      args: ["--drop-empty-cells",
-             "--extra-keys", "metadata.kernelspec metadata.language_info"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: "0.8.1"
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--drop-empty-cells",
+            "--extra-keys",
+            "metadata.kernelspec metadata.language_info",
+          ]
 
-- repo: https://github.com/mgedmin/check-manifest
-  rev: "0.50"
-  hooks:
-  - id: check-manifest
-    stages: [manual]
+  - repo: https://github.com/mgedmin/check-manifest
+    rev: "0.50"
+    hooks:
+      - id: check-manifest
+        stages: [manual]
 
-- repo: https://github.com/codespell-project/codespell
-  rev: "v2.3.0"
-  hooks:
-  - id: codespell
-    args: ["-L", "nd,unparseable,compiletime,livetime,fom,puls"]
+  - repo: https://github.com/codespell-project/codespell
+    rev: "v2.3.0"
+    hooks:
+      - id: codespell
+        args: ["-L", "nd,unparseable,compiletime,livetime,fom,puls"]
 
-- repo: https://github.com/shellcheck-py/shellcheck-py
-  rev: "v0.10.0.1"
-  hooks:
-  - id: shellcheck
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: "v0.10.0.1"
+    hooks:
+      - id: shellcheck
 
-- repo: https://github.com/pre-commit/pygrep-hooks
-  rev: "v1.10.0"
-  hooks:
-  - id: rst-backticks
-  - id: rst-directive-colons
-  - id: rst-inline-touching-normal
+  - repo: https://github.com/pre-commit/pygrep-hooks
+    rev: "v1.10.0"
+    hooks:
+      - id: rst-backticks
+      - id: rst-directive-colons
+      - id: rst-inline-touching-normal
 
-- repo: https://github.com/pre-commit/mirrors-prettier
-  rev: "v4.0.0-alpha.8"
-  hooks:
-    - id: prettier
-      types_or: [json]
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v4.0.0-alpha.8"
+    hooks:
+      - id: prettier
+        types_or: [yaml, markdown, json]
 
-- repo: https://github.com/snakemake/snakefmt
-  rev: v0.10.2
-  hooks:
-    - id: snakefmt
-      files: Snakefile*|\.smk
+  - repo: https://github.com/snakemake/snakefmt
+    rev: v0.10.2
+    hooks:
+      - id: snakefmt
+        files: Snakefile*|\.smk
diff --git a/LICENSE.md b/LICENSE.md
index 35d8ee3..b07a92a 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -2,10 +2,10 @@ The legend-dataflow package is licensed under the MIT "Expat" License:
 
 > Copyright (c) 2021:
 >
->    Matteo Agostini <matteo.agostini@ph.tum.de>
->    Oliver Schulz <oschulz@mpp.mpg.de>
->    George Marshall <george.marshall.20@ucl.ac.uk>
->    Luigi Pertoldi <gipert@pm.me>
+> Matteo Agostini <matteo.agostini@ph.tum.de>
+> Oliver Schulz <oschulz@mpp.mpg.de>
+> George Marshall <george.marshall.20@ucl.ac.uk>
+> Luigi Pertoldi <gipert@pm.me>
 >
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
@@ -24,4 +24,3 @@ The legend-dataflow package is licensed under the MIT "Expat" License:
 > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 > SOFTWARE.
->
diff --git a/config-lngs.yaml b/config-lngs.yaml
index 901cac8..971399c 100644
--- a/config-lngs.yaml
+++ b/config-lngs.yaml
@@ -1,15 +1,18 @@
 setups:
   l200:
     paths:
-      sandbox_path: ''
-      tier_daq: $_/generated/tier/daq
-      tier_raw_blind: ''
+      sandbox_path: /data1/shared/l200-p13/sandbox
+      tier_daq: $_/../daq/generated/tier/daq
+      tier_raw_blind: ""
+
       workflow: $_/workflow
+
       metadata: $_/inputs
       config: $_/inputs/dataprod/config
       par_overwrite: $_/inputs/dataprod/overrides
       chan_map: $_/inputs/hardware/configuration
       detector_db: $_/inputs/hardware/detectors
+
       tier: $_/generated/tier
       tier_raw: $_/generated/tier/raw
       tier_tcm: $_/generated/tier/tcm
@@ -22,6 +25,7 @@ setups:
       tier_pan: $_/generated/tier/pan
       tier_pet: $_/generated/tier/pet
       tier_skm: $_/generated/tier/skm
+
       par: $_/generated/par
       par_raw: $_/generated/par/raw
       par_tcm: $_/generated/par/tcm
@@ -31,41 +35,47 @@ setups:
       par_psp: $_/generated/par/psp
       par_pht: $_/generated/par/pht
       par_pet: $_/generated/par/pet
+
       plt: $_/generated/plt
       log: $_/generated/log
+
       tmp_plt: $_/generated/tmp/plt
       tmp_log: $_/generated/tmp/log
       tmp_filelists: $_/generated/tmp/filelists
       tmp_par: $_/generated/tmp/par
+
       src: $_/software/python/src
       install: $_/software/python/install
       cache: $_/software/python/cache
+
     table_format:
       raw: ch{ch:07d}/raw
       dsp: ch{ch:07d}/dsp
       psp: ch{ch:07d}/dsp
       hit: ch{ch:07d}/hit
       pht: ch{ch:07d}/hit
-      evt: '{grp}/evt'
-      pet: '{grp}/evt'
-      skm: '{grp}/skm'
+      evt: "{grp}/evt"
+      pet: "{grp}/evt"
+      skm: "{grp}/skm"
       tcm: hardware_tcm_1
+
     execenv:
       cmd: apptainer run
       arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif
       env:
-        HDF5_USE_FILE_LOCKING: 'False'
-        LGDO_BOUNDSCHECK: 'false'
-        DSPEED_BOUNDSCHECK: 'false'
-        PYGAMA_PARALLEL: 'false'
-        PYGAMA_FASTMATH: 'false'
+        PRODENV: $PRODENV
+        HDF5_USE_FILE_LOCKING: "False"
+        LGDO_BOUNDSCHECK: "false"
+        DSPEED_BOUNDSCHECK: "false"
+        PYGAMA_PARALLEL: "false"
+        PYGAMA_FASTMATH: "false"
+        DISABLE_TQDM: "True"
+
     pkg_versions:
-      pygama: pygama==2.0.3
-      pylegendmeta: pylegendmeta==1.1.0
-      dspeed: dspeed==1.6.1
-      legend-pydataobj: legend-pydataobj==1.9.0
-      legend-daq2lh5: legend-daq2lh5==1.2.2
-      tensorflow: tensorflow==2.17
-      keras: keras==3.6.0
-      jax: jax==0.4.30
+      - pygama==2.0.*
+      - pylegendmeta==1.2.0a2
+      - dspeed==1.6.*
+      - legend-pydataobj>=1.11.4
+      - legend-daq2lh5==1.4.*
+
     meta_version: v0.5.7
diff --git a/config-nersc.yaml b/config-nersc.yaml
index 88b5156..f94d8ff 100644
--- a/config-nersc.yaml
+++ b/config-nersc.yaml
@@ -1,9 +1,9 @@
 setups:
   l200:
     paths:
-      sandbox_path: ''
+      sandbox_path: ""
       tier_daq: $_/generated/tier/daq
-      tier_raw_blind: ''
+      tier_raw_blind: ""
       workflow: $_/workflow
       metadata: $_/inputs
       config: $_/inputs/dataprod/config
@@ -44,19 +44,19 @@ setups:
       psp: ch{ch:07d}/dsp
       hit: ch{ch:07d}/hit
       pht: ch{ch:07d}/hit
-      evt: '{grp}/evt'
-      pet: '{grp}/evt'
-      skm: '{grp}/skm'
+      evt: "{grp}/evt"
+      pet: "{grp}/evt"
+      skm: "{grp}/skm"
       tcm: hardware_tcm_1
     execenv:
       cmd: shifter
-      arg: ' --image legendexp/legend-base:latest'
+      arg: " --image legendexp/legend-base:latest"
       env:
-        HDF5_USE_FILE_LOCKING: 'FALSE'
-        LGDO_BOUNDSCHECK: 'false'
-        DSPEED_BOUNDSCHECK: 'false'
-        PYGAMA_PARALLEL: 'false'
-        PYGAMA_FASTMATH: 'false'
+        HDF5_USE_FILE_LOCKING: "FALSE"
+        LGDO_BOUNDSCHECK: "false"
+        DSPEED_BOUNDSCHECK: "false"
+        PYGAMA_PARALLEL: "false"
+        PYGAMA_FASTMATH: "false"
     pkg_versions:
       pygama: pygama==2.0.3
       pylegendmeta: pylegendmeta==0.10.2
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index c4e249d..5fce213 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -8,7 +8,8 @@
 import subprocess
 from pathlib import Path
 
-import yaml
+import dbetto
+from packaging.requirements import Requirement
 
 
 def dataprod() -> None:
@@ -17,7 +18,7 @@ def dataprod() -> None:
     .. code-block:: console
 
       $ dataprod --help
-      $ dataprod load --help  # help section for a specific sub-command
+      $ dataprod exec --help  # help section for a specific sub-command
     """
 
     parser = argparse.ArgumentParser(
@@ -36,14 +37,14 @@ def dataprod() -> None:
     )
     parser_install.set_defaults(func=install)
 
-    parser_load = subparsers.add_parser(
-        "load", help="load data production environment and execute a given command"
+    parser_exec = subparsers.add_parser(
+        "exec", help="load data production environment and execute a given command"
     )
-    parser_load.add_argument("config_file", help="production cycle configuration file", type=str)
-    parser_load.add_argument(
+    parser_exec.add_argument("config_file", help="production cycle configuration file", type=str)
+    parser_exec.add_argument(
         "command", help="command to run within the container", type=str, nargs="+"
     )
-    parser_load.set_defaults(func=load)
+    parser_exec.set_defaults(func=cmdexec)
 
     args = parser.parse_args()
     args.func(args)
@@ -52,13 +53,14 @@ def dataprod() -> None:
 def install(args) -> None:
     """
     This function installs user software in the data production environment.
-    The software packages should be specified in the config.yaml file with the format:
+    The software packages should be specified in the config.yaml file with the
+    format:
 
     ```yaml
     setups:
-        l200:
-            pkg_versions:
-                package_name: package_version
+      l200:
+        pkg_versions:
+          - python_package_spec
     ```
     """
     print(args.config_file)
@@ -67,14 +69,13 @@ def install(args) -> None:
         raise RuntimeError(msg)
 
     config_file_dir = Path(args.config_file).resolve().parent
-    with Path(args.config_file).open() as r:
-        config_dic = yaml.safe_load(r)
+    config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file))
 
-    exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"]
-    exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"]
-    path_src = config_dic["setups"]["l200"]["paths"]["src"]
-    path_install = config_dic["setups"]["l200"]["paths"]["install"]
-    path_cache = config_dic["setups"]["l200"]["paths"]["cache"]
+    exec_cmd = config_dic.setups.l200.execenv.cmd
+    exec_arg = config_dic.setups.l200.execenv.arg
+    path_src = config_dic.setups.l200.paths.src
+    path_install = config_dic.setups.l200.paths.install
+    path_cache = config_dic.setups.l200.paths.cache
 
     exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
     exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})
@@ -87,11 +88,12 @@ def install(args) -> None:
         shutil.rmtree(path_cache)
 
     pkg_list = ""
-    for pkg, pkg_version in config_dic["setups"]["l200"]["pkg_versions"].items():
+    for spec in config_dic.setups.l200.pkg_versions:
+        pkg = Requirement(spec).name
         if (path_src / pkg).exists():
             pkg_list += f" '{path_src / pkg}'"
         else:
-            pkg_list += f" '{pkg_version}'"
+            pkg_list += f" '{spec}'"
 
     cmd_expr = (
         f"PYTHONUSERBASE={path_install} PIP_CACHE_DIR={path_cache} "
@@ -101,23 +103,22 @@ def install(args) -> None:
     os.system(cmd_expr)
 
 
-def load(args) -> None:
+def cmdexec(args) -> None:
     """
     This function loads the data production environment and executes a given command.
     """
 
     if not Path(args.config_file).is_file():
-        print("Error: config file does not exist")
-        exit()
+        msg = "config file is not a regular file"
+        raise RuntimeError(msg)
 
     config_file_dir = Path(args.config_file).resolve().parent
-    with Path(args.config_file).open() as r:
-        config_dic = yaml.safe_load(r)
+    config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file))
 
-    exec_cmd = config_dic["setups"]["l200"]["execenv"]["cmd"]
-    exec_arg = config_dic["setups"]["l200"]["execenv"]["arg"]
-    env_vars = config_dic["setups"]["l200"]["execenv"]["env"]
-    path_install = config_dic["setups"]["l200"]["paths"]["install"]
+    exec_cmd = config_dic.setups.l200.execenv.cmd
+    exec_arg = config_dic.setups.l200.execenv.arg
+    env_vars = config_dic.setups.l200.execenv.env
+    path_install = config_dic.setups.l200.paths.install
 
     exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
     exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})

From 48a35e04f3bcfc51c9540eeb273cc865b8b72d39 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 12:01:24 +0100
Subject: [PATCH 060/101] really fix the RTD build

---
 .readthedocs.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index ca8910f..103c066 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -17,6 +17,8 @@ build:
       --module-first
       --force
       --output-dir docs/source/api
-      scripts
+      workflow/scripts
+      workflow/src
+      workflow/rules
     - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D
       language=en docs/source $READTHEDOCS_OUTPUT/html

From 2af22db5cd320ba1f53bed688c6dc48f0cd7466e Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 12:30:05 +0100
Subject: [PATCH 061/101] fix all package imports

---
 workflow/Snakefile                      | 65 ++++++++++---------------
 workflow/Snakefile-build-raw            | 41 ++++++----------
 workflow/rules/ann.smk                  |  2 +-
 workflow/rules/blinding_calibration.smk |  2 +-
 workflow/rules/blinding_check.smk       |  2 +-
 workflow/rules/chanlist_gen.smk         |  6 +--
 workflow/rules/common.smk               | 40 +++++++--------
 workflow/rules/dsp.smk                  |  8 +--
 workflow/rules/dsp_pars_geds.smk        |  4 +-
 workflow/rules/evt.smk                  |  4 +-
 workflow/rules/filelist_gen.smk         | 28 +++++------
 workflow/rules/hit.smk                  |  6 +--
 workflow/rules/main.smk                 |  2 +-
 workflow/rules/pht.smk                  |  8 +--
 workflow/rules/pht_fast.smk             |  8 +--
 workflow/rules/psp.smk                  |  6 +--
 workflow/rules/psp_pars_geds.smk        |  8 +--
 workflow/rules/qc_phy.smk               |  8 +--
 workflow/rules/raw.smk                  |  6 +--
 workflow/rules/skm.smk                  |  2 +-
 workflow/rules/tcm.smk                  |  2 +-
 21 files changed, 112 insertions(+), 146 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index ba839be..011cb05 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -17,43 +17,28 @@ import glob
 from datetime import datetime
 from collections import OrderedDict
 import logging
-from pylegendmeta import LegendMetadata
-
-import scripts.library as lib
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.patterns import get_pattern_tier
-from scripts.library.utils import (
-    subst_vars_in_snakemake_config,
-    runcmd,
-    config_path,
-    chan_map_path,
-    filelist_path,
-    metadata_path,
-    tmp_log_path,
-    pars_path,
-    det_status_path,
-)
-
-# Set with `snakemake --configfile=/path/to/your/config.json`
-# configfile: "have/to/specify/path/to/your/config.json"
-
-subst_vars_in_snakemake_config(workflow, config)
+
+from legendmeta import LegendMetadata
+from legenddataflow import CalGrouping
+from legenddataflow import utils
+
+utils.subst_vars_in_snakemake_config(workflow, config)
 
 check_in_cycle = True
 setup = config["setups"]["l200"]
-configs = config_path(setup)
-chan_maps = chan_map_path(setup)
-meta = metadata_path(setup)
-det_status = det_status_path(setup)
-swenv = runcmd(setup)
+configs = utils.config_path(setup)
+chan_maps = utils.chan_map_path(setup)
+meta = utils.metadata_path(setup)
+det_status = utils.det_status_path(setup)
+swenv = utils.runcmd(setup)
 basedir = workflow.basedir
 
 # wait for new pylegendmeta release
 # if not Path(meta).exists():
 #     meta = LegendMetadata()
-#     meta.checkout(config["setups"]["l200"]["meta_version"])
+#     meta.checkout(config["setups"]["l200"]["legend_metadata_version"])
 
-part = lib.CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
+part = CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
 
 
 wildcard_constraints:
@@ -96,7 +81,7 @@ onstart:
         shell('{swenv} python3 -B -c "import ' + pkg + '"')
 
         # Log parameter catalogs in validity.jsonl files
-    hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml"
+    hit_par_cat_file = Path(utils.pars_path(setup)) / "hit" / "validity.yaml"
     if hit_par_cat_file.is_file():
         hit_par_cat_file.unlink()
     try:
@@ -105,7 +90,7 @@ onstart:
     except NameError:
         print("No hit parameter catalog found")
 
-    pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml"
+    pht_par_cat_file = Path(utils.pars_path(setup)) / "pht" / "validity.yaml"
     if pht_par_cat_file.is_file():
         pht_par_cat_file.unlink()
     try:
@@ -114,7 +99,7 @@ onstart:
     except NameError:
         print("No pht parameter catalog found")
 
-    dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml"
+    dsp_par_cat_file = Path(utils.pars_path(setup)) / "dsp" / "validity.yaml"
     if dsp_par_cat_file.is_file():
         dsp_par_cat_file.unlink()
     try:
@@ -123,7 +108,7 @@ onstart:
     except NameError:
         print("No dsp parameter catalog found")
 
-    psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml"
+    psp_par_cat_file = Path(utils.pars_path(setup)) / "psp" / "validity.yaml"
     if psp_par_cat_file.is_file():
         psp_par_cat_file.unlink()
     try:
@@ -155,24 +140,24 @@ onsuccess:
             os.remove(file)
 
             # remove filelists
-    files = glob.glob(os.path.join(filelist_path(setup), "*"))
+    files = glob.glob(os.path.join(utils.filelist_path(setup), "*"))
     for file in files:
         if os.path.isfile(file):
             os.remove(file)
-    if os.path.exists(filelist_path(setup)):
-        os.rmdir(filelist_path(setup))
+    if os.path.exists(utils.filelist_path(setup)):
+        os.rmdir(utils.filelist_path(setup))
 
         # remove logs
-    files = glob.glob(os.path.join(tmp_log_path(setup), "*", "*.log"))
+    files = glob.glob(os.path.join(utils.tmp_log_path(setup), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
             os.remove(file)
-    dirs = glob.glob(os.path.join(tmp_log_path(setup), "*"))
+    dirs = glob.glob(os.path.join(utils.tmp_log_path(setup), "*"))
     for d in dirs:
         if os.path.isdir(d):
             os.rmdir(d)
-    if os.path.exists(tmp_log_path(setup)):
-        os.rmdir(tmp_log_path(setup))
+    if os.path.exists(utils.tmp_log_path(setup)):
+        os.rmdir(utils.tmp_log_path(setup))
 
 
 rule gen_filelist:
@@ -192,6 +177,6 @@ rule gen_filelist:
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
+        temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"),
     script:
         "scripts/write_filelist.py"
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index 0dec789..763cb8a 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -8,39 +8,25 @@ to the blinded raw data. It handles:
 
 import os, sys
 from pathlib import Path
-from scripts.util import patterns as patt
-from scripts.util.utils import (
-    subst_vars_in_snakemake_config,
-    runcmd,
-    config_path,
-    chan_map_path,
-    filelist_path,
-    pars_path,
-    metadata_path,
-    det_status_path,
-)
-from scripts.util.create_pars_keylist import ParsKeyResolve
+from legenddataflow import patterns as patt
+from legenddataflow import utils, ParsKeyResolve
 
 check_in_cycle = True
 
-# Set with `snakemake --configfile=/path/to/your/config.json`
-# configfile: "have/to/specify/path/to/your/config.json"
-
-subst_vars_in_snakemake_config(workflow, config)
+utils.subst_vars_in_snakemake_config(workflow, config)
 
 setup = config["setups"]["l200"]
-configs = config_path(setup)
-chan_maps = chan_map_path(setup)
-swenv = runcmd(setup)
-meta = metadata_path(setup)
-det_status = det_status_path(setup)
+configs = utils.config_path(setup)
+chan_maps = utils.chan_map_path(setup)
+swenv = utils.runcmd(setup)
+meta = utils.metadata_path(setup)
+det_status = utils.det_status_path(setup)
 basedir = workflow.basedir
 
 # wait for new pylegendmeta release
 # if not Path(meta).exists():
 #     meta = LegendMetadata()
-#     meta.checkout(config["setups"]["l200"]["meta_version"])
-s
+#     meta.checkout(config["setups"]["l200"]["legend_metadata_version"])
 
 
 wildcard_constraints:
@@ -70,7 +56,7 @@ onstart:
     shell('{swenv} python3 -B -c "import daq2lh5 "')
 
 
-    raw_par_cat_file = Path(pars_path(setup)) / "raw" / "validity.yaml"
+    raw_par_cat_file = Path(utils.pars_path(setup)) / "raw" / "validity.yaml"
     if raw_par_cat_file.is_file():
         raw_par_cat_file.unlink()
     try:
@@ -83,7 +69,7 @@ onstart:
 onsuccess:
     print("Workflow finished, no error")
     shell("rm *.gen || true")
-    shell(f"rm {filelist_path(setup)}/* || true")
+    shell(f"rm {utils.filelist_path(setup)}/* || true")
 
 
 rule gen_filelist:
@@ -96,7 +82,7 @@ rule gen_filelist:
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        temp(Path(filelist_path(setup)) / "{label}-{tier}.filelist"),
+        temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"),
     script:
         "scripts/write_filelist.py"
 
@@ -112,3 +98,6 @@ rule sort_data:
         patt.get_pattern_tier_daq(setup, extension="fcio"),
     shell:
         "mv {input} {output}"
+
+
+# vim: filetype=snakemake
diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index f1a47cd..2565514 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -4,7 +4,7 @@ to apply the ann and risetime cuts for psd.
 
 """
 
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index d28072f..b8076d7 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -4,7 +4,7 @@ Snakemake rules for calibrating daq energy for blinding. Two steps:
 - combining all channels into single par file
 """
 
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_plts,
     get_pattern_pars_tmp_channel,
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index e556abb..b142c19 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -4,7 +4,7 @@ Snakemake rules for checking blinding. Two steps:
 - combining all channel check files into single check file
 """
 
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
index 68c33e4..4e46f13 100644
--- a/workflow/rules/chanlist_gen.smk
+++ b/workflow/rules/chanlist_gen.smk
@@ -4,12 +4,12 @@ import os
 import random
 import re
 
-from scripts.library.FileKey import ChannelProcKey
-from scripts.library.patterns import (
+from legenddataflow.FileKey import ChannelProcKey
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
 )
-from scripts.library.utils import filelist_path, runcmd
+from legenddataflow.utils import filelist_path, runcmd
 
 
 def get_par_chanlist(
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 4f99d5c..17571e3 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -3,16 +3,10 @@ Helper functions for running data production
 """
 
 from pathlib import Path
-from scripts.library.patterns import (
-    get_pattern_tier_daq_unsorted,
-    get_pattern_tier_daq,
-    get_pattern_tier,
-    par_overwrite_path,
-    get_pars_path,
-)
-from scripts.library import ProcessingFileKey
+from legenddataflow import patterns as patt
+from legenddataflow import ProcessingFileKey
 from dbetto.catalog import Catalog
-from scripts.library import utils
+from legenddataflow import utils
 
 
 def ro(path):
@@ -22,14 +16,14 @@ def ro(path):
 def get_blinding_curve_file(wildcards):
     """func to get the blinding calibration curves from the overrides"""
     par_files = Catalog.get_files(
-        Path(par_overwrite_path(setup)) / "raw" / "validity.yaml",
+        Path(patt.par_overwrite_path(setup)) / "raw" / "validity.yaml",
         wildcards.timestamp,
     )
     if isinstance(par_files, str):
-        return str(Path(par_overwrite_path(setup)) / "raw" / par_files)
+        return str(Path(patt.par_overwrite_path(setup)) / "raw" / par_files)
     else:
         return [
-            str(Path(par_overwrite_path(setup)) / "raw" / par_file)
+            str(Path(patt.par_overwrite_path(setup)) / "raw" / par_file)
             for par_file in par_files
         ]
 
@@ -37,12 +31,14 @@ def get_blinding_curve_file(wildcards):
 def get_blinding_check_file(wildcards):
     """func to get the right blinding check file"""
     par_files = Catalog.get_files(
-        Path(get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp
+        Path(patt.get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp
     )
     if isinstance(par_files, str):
-        return Path(get_pars_path(setup, "raw")) / par_files
+        return Path(patt.get_pars_path(setup, "raw")) / par_files
     else:
-        return [Path(get_pars_path(setup, "raw")) / par_file for par_file in par_files]
+        return [
+            Path(patt.get_pars_path(setup, "raw")) / par_file for par_file in par_files
+        ]
 
 
 def set_last_rule_name(workflow, new_name):
@@ -71,19 +67,19 @@ def set_last_rule_name(workflow, new_name):
 
 
 def get_input_par_file(wildcards, tier, name):
-    par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
+    par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml"
     pars_files_overwrite = Catalog.get_files(
         par_overwrite_file,
         wildcards.timestamp,
     )
     for pars_file in pars_files_overwrite:
         if name in str(pars_file):
-            return Path(par_overwrite_path(setup)) / tier / pars_file
+            return Path(patt.par_overwrite_path(setup)) / tier / pars_file
     raise ValueError(f"Could not find model in {pars_files_overwrite}")
 
 
 def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
-    par_overwrite_file = Path(par_overwrite_path(setup)) / tier / "validity.yaml"
+    par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml"
     if timestamp is not None:
         pars_files_overwrite = Catalog.get_files(
             par_overwrite_file,
@@ -101,7 +97,7 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
     out_files = []
     for pars_file in pars_files_overwrite:
         if fullname in str(pars_file):
-            out_files.append(Path(par_overwrite_path(setup)) / tier / pars_file)
+            out_files.append(Path(patt.par_overwrite_path(setup)) / tier / pars_file)
     if len(out_files) == 0:
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
@@ -113,8 +109,8 @@ def get_search_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return get_pattern_tier_daq_unsorted(setup, extension="*")
+        return patt.get_pattern_tier_daq_unsorted(setup, extension="*")
     elif tier == "raw":
-        return get_pattern_tier_daq(setup, extension="*")
+        return patt.get_pattern_tier_daq(setup, extension="*")
     else:
-        return get_pattern_tier(setup, "raw", check_in_cycle=False)
+        return patt.get_pattern_tier(setup, "raw", check_in_cycle=False)
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index f4f8487..501ed52 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -4,11 +4,11 @@ Snakemake rules for processing dsp tier.
 - running dsp over all channels using par file
 """
 
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.create_pars_keylist import ParsKeyResolve
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.library.create_pars_keylist import ParsKeyResolve
-from scripts.library.patterns import (
+from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.patterns import (
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 7f16c9e..f526d6b 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -4,8 +4,8 @@ Snakemake rules for building dsp pars for HPGes, before running build_dsp()
 - extraction of energy filter parameters and charge trapping correction for each channel from cal data
 """
 
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.patterns import (
+from legenddataflow.create_pars_keylist import pars_key_resolve
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index 4e96a85..d14b8cb 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -2,8 +2,8 @@
 Snakemake rules for processing evt tier.
 """
 
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.patterns import (
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
diff --git a/workflow/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk
index 5d1f928..32d6175 100644
--- a/workflow/rules/filelist_gen.smk
+++ b/workflow/rules/filelist_gen.smk
@@ -2,12 +2,8 @@ import glob
 import json, yaml
 from pathlib import Path
 
-from scripts.library.FileKey import FileKey, run_grouper
-from scripts.library.patterns import (
-    get_pattern_tier,
-    get_pattern_tier_raw_blind,
-    get_pattern_tier_daq,
-)
+from legenddataflow.FileKey import FileKey, run_grouper
+from legenddataflow import patterns as patt
 
 concat_datatypes = ["phy"]
 concat_tiers = ["skm", "pet_concat", "evt_concat"]
@@ -116,15 +112,15 @@ def get_pattern(setup, tier):
     as only phy files are taken to skm others are only taken to pet
     """
     if tier == "blind":
-        fn_pattern = get_pattern_tier(setup, "raw", check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(setup, "raw", check_in_cycle=False)
     elif tier in ("skm", "pet_concat"):
-        fn_pattern = get_pattern_tier(setup, "pet", check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(setup, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
-        fn_pattern = get_pattern_tier(setup, "evt", check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(setup, "evt", check_in_cycle=False)
     elif tier == "daq":
-        fn_pattern = get_pattern_tier_daq(setup, extension="{ext}")
+        fn_pattern = patt.get_pattern_tier_daq(setup, extension="{ext}")
     else:
-        fn_pattern = get_pattern_tier(setup, tier, check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(setup, tier, check_in_cycle=False)
     return fn_pattern
 
 
@@ -132,15 +128,15 @@ def concat_phy_filenames(setup, phy_filenames, tier):
     """
     This function concatenates the files from the same run together
     """
-    fn_pattern = get_pattern(setup, tier)
+    fn_pattern = patt.get_pattern(setup, tier)
     # group files by run
-    sorted_phy_filenames = run_grouper(phy_filenames)
+    sorted_phy_filenames = patt.run_grouper(phy_filenames)
     phy_filenames = []
 
     for run in sorted_phy_filenames:
         key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
         out_key = FileKey.get_path_from_filekey(
-            key, get_pattern_tier(setup, tier, check_in_cycle=False)
+            key, patt.get_pattern_tier(setup, tier, check_in_cycle=False)
         )[0]
 
         phy_filenames.append(out_key)
@@ -181,11 +177,11 @@ def build_filelist(
             else:
                 if tier == "blind" and _key.datatype in blind_datatypes:
                     filename = FileKey.get_path_from_filekey(
-                        _key, get_pattern_tier_raw_blind(setup)
+                        _key, patt.get_pattern_tier_raw_blind(setup)
                     )
                 elif tier == "skm":
                     filename = FileKey.get_path_from_filekey(
-                        _key, get_pattern_tier(setup, "pet", check_in_cycle=False)
+                        _key, patt.get_pattern_tier(setup, "pet", check_in_cycle=False)
                     )
                 elif tier == "daq":
                     filename = FileKey.get_path_from_filekey(
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 5ea14ff..0af7590 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -6,10 +6,10 @@ Snakemake rules for processing hit tier. This is done in 4 steps:
 - running build hit over all channels using par file
 """
 
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.create_pars_keylist import ParsKeyResolve
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk
index f227f17..e0d886e 100644
--- a/workflow/rules/main.smk
+++ b/workflow/rules/main.smk
@@ -1,6 +1,6 @@
 import os
 from datetime import datetime
-from scripts.library.utils import (
+from legenddataflow.utils import (
     filelist_path,
     log_path,
     tmp_par_path,
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index 239e3c5..27e4f81 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -6,11 +6,11 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.create_pars_keylist import ParsKeyResolve
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.library.utils import filelist_path, set_last_rule_name
-from scripts.library.patterns import (
+from legenddataflow.utils import filelist_path, set_last_rule_name
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk
index b177f12..75d8e7e 100644
--- a/workflow/rules/pht_fast.smk
+++ b/workflow/rules/pht_fast.smk
@@ -1,7 +1,7 @@
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.create_pars_keylist import ParsKeyResolve
-from scripts.library.utils import filelist_path, set_last_rule_name
-from scripts.library.patterns import (
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.utils import filelist_path, set_last_rule_name
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index dc0cfe5..d55fbcc 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -4,10 +4,10 @@ Snakemake rules for processing psp (partition dsp) tier data.
 - running build hit over all channels using par file
 """
 
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.create_pars_keylist import ParsKeyResolve
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 8d3d2c8..9e14cad 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -4,10 +4,10 @@ Snakemake rules for processing psp (partition dsp) tier data.
 - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
 """
 
-from scripts.util.pars_loading import pars_catalog
-from scripts.util.create_pars_keylist import pars_key_resolve
-from scripts.util.utils import par_psp_path, par_dsp_path, set_last_rule_name
-from scripts.util.patterns import (
+from legenddataflow.pars_loading import pars_catalog
+from legenddataflow.create_pars_keylist import pars_key_resolve
+from legenddataflow.utils import par_psp_path, par_dsp_path, set_last_rule_name
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index d7a10f4..982ab4e 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -1,7 +1,7 @@
-from scripts.library.pars_loading import ParsCatalog
-from scripts.library.create_pars_keylist import ParsKeyResolve
-from scripts.library.utils import filelist_path, set_last_rule_name
-from scripts.library.patterns import (
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.utils import filelist_path, set_last_rule_name
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index 17d1e3b..f647095 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -1,12 +1,12 @@
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_tier_daq_unsorted,
     get_pattern_tier_daq,
     get_pattern_tier,
     get_pattern_log,
     get_pattern_tier_raw_blind,
 )
-from scripts.library.utils import set_last_rule_name
-from scripts.library.create_pars_keylist import ParsKeyResolve
+from legenddataflow.utils import set_last_rule_name
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 
 raw_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index 404b81b..d3c5d51 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -2,7 +2,7 @@
 Snakemake rules for processing skm tier.
 """
 
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index 941455d..6fa85a9 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -2,7 +2,7 @@
 Snakemake file containing the rules for generating the tcm
 """
 
-from scripts.library.patterns import (
+from legenddataflow.patterns import (
     get_pattern_tier,
     get_pattern_log,
     get_pattern_pars_tmp_channel,

From cf9e6bd79bbc9c59a60976c180b41b88aa529c1d Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 13:57:03 +0100
Subject: [PATCH 062/101] update profiles

---
 .gitignore                                                  | 2 ++
 config-lngs.yaml                                            | 3 ++-
 workflow/profiles/default/config.yaml                       | 1 -
 workflow/profiles/{build-raw => lngs-build-raw}/config.yaml | 2 +-
 workflow/profiles/{legend-data => lngs}/config.yaml         | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)
 rename workflow/profiles/{build-raw => lngs-build-raw}/config.yaml (84%)
 rename workflow/profiles/{legend-data => lngs}/config.yaml (83%)

diff --git a/.gitignore b/.gitignore
index a904f40..b4586b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+.snakemake
+
 # New additions go at the top!
 *.c
 .DS_Store
diff --git a/config-lngs.yaml b/config-lngs.yaml
index 971399c..b14c913 100644
--- a/config-lngs.yaml
+++ b/config-lngs.yaml
@@ -11,6 +11,7 @@ setups:
       config: $_/inputs/dataprod/config
       par_overwrite: $_/inputs/dataprod/overrides
       chan_map: $_/inputs/hardware/configuration
+      detector_status: $_/inputs/datasets
       detector_db: $_/inputs/hardware/detectors
 
       tier: $_/generated/tier
@@ -78,4 +79,4 @@ setups:
       - legend-pydataobj>=1.11.4
       - legend-daq2lh5==1.4.*
 
-    meta_version: v0.5.7
+    legend_metadata_version: v0.5.7
diff --git a/workflow/profiles/default/config.yaml b/workflow/profiles/default/config.yaml
index 53a11cd..ba92572 100644
--- a/workflow/profiles/default/config.yaml
+++ b/workflow/profiles/default/config.yaml
@@ -1,5 +1,4 @@
 cores: all
-configfile: config.json
 snakefile: ./workflow/Snakefile
 keep-going: true
 rerun-incomplete: true
diff --git a/workflow/profiles/build-raw/config.yaml b/workflow/profiles/lngs-build-raw/config.yaml
similarity index 84%
rename from workflow/profiles/build-raw/config.yaml
rename to workflow/profiles/lngs-build-raw/config.yaml
index 4525deb..73b5cb5 100644
--- a/workflow/profiles/build-raw/config.yaml
+++ b/workflow/profiles/lngs-build-raw/config.yaml
@@ -3,7 +3,7 @@ restart-times: 2
 max-jobs-per-second: 1
 resources:
   - mem_swap=3500
-configfile: config.json
+configfile: config-lngs.yaml
 snakefile: ./workflow/Snakefile-build-raw
 keep-going: true
 rerun-incomplete: true
diff --git a/workflow/profiles/legend-data/config.yaml b/workflow/profiles/lngs/config.yaml
similarity index 83%
rename from workflow/profiles/legend-data/config.yaml
rename to workflow/profiles/lngs/config.yaml
index 364bdb1..1f27969 100644
--- a/workflow/profiles/legend-data/config.yaml
+++ b/workflow/profiles/lngs/config.yaml
@@ -3,7 +3,7 @@ restart-times: 2
 max-jobs-per-second: 1
 resources:
   - mem_swap=3500
-configfile: config.json
+configfile: config-lngs.yaml
 snakefile: ./workflow/Snakefile
 keep-going: true
 rerun-incomplete: true

From f4bbffa6091d790501bf025cf3e2ab39819d8bf0 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 29 Jan 2025 13:57:23 +0100
Subject: [PATCH 063/101] switch to subrocess library

---
 workflow/src/legenddataflow/execenv.py | 37 ++++++++++++++++++++------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index 5fce213..fa91400 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -87,20 +87,41 @@ def install(args) -> None:
         shutil.rmtree(path_install)
         shutil.rmtree(path_cache)
 
-    pkg_list = ""
+    pkg_list = []
     for spec in config_dic.setups.l200.pkg_versions:
         pkg = Requirement(spec).name
         if (path_src / pkg).exists():
-            pkg_list += f" '{path_src / pkg}'"
+            pkg_list.append(str(path_src / pkg))
         else:
-            pkg_list += f" '{spec}'"
+            pkg_list.append(spec)
+
+    cmd_base = [
+        *(exec_cmd.split()),
+        exec_arg,
+        "python3",
+        "-B",
+        "-m",
+        "pip",
+        "install",
+        "--no-warn-script-location",
+    ]
+
+    cmd_expr = cmd_base + pkg_list
+    cmdenv = {
+        "PYTHONUSERBASE": path_install,
+        "PIP_CACHE_DIR": path_cache,
+    }
+
+    print(
+        "INFO: running:",
+        " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + " " + " ".join(cmd_expr),
+    )
 
-    cmd_expr = (
-        f"PYTHONUSERBASE={path_install} PIP_CACHE_DIR={path_cache} "
-        f"{exec_cmd} {exec_arg} python3 -B -m pip install --no-warn-script-location {pkg_list}"
+    subprocess.run(
+        cmd_expr,
+        env=cmdenv,
+        check=True,
     )
-    print("INFO: running:", cmd_expr)
-    os.system(cmd_expr)
 
 
 def cmdexec(args) -> None:

From f7dbc325004ae87977f9600ae1d1dab7b817c5e2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Wed, 29 Jan 2025 18:48:51 +0100
Subject: [PATCH 064/101] remove smk functions

---
 pyproject.toml                         |  5 ++-
 workflow/src/legenddataflow/FileKey.py | 52 +++++++++++++++-----------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ee2f40c..df67b42 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,12 +51,15 @@ dynamic = ["version"]
 
 dependencies = [
     "dbetto>=1.0.5",
-    "snakemake>=8.16",
     "pylegendmeta==1.2.0a2",
 ]
 
 [project.optional-dependencies]
+full = [
+"snakemake>=8.16",
+]
 no_container = [
+  "legend_dataflow[full]",
   "pygama",
   "dspeed",
   "pylegendmeta==1.2.0a2",
diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py
index ca4573c..63a1842 100644
--- a/workflow/src/legenddataflow/FileKey.py
+++ b/workflow/src/legenddataflow/FileKey.py
@@ -3,11 +3,10 @@
 """
 
 import re
+import string
 from collections import namedtuple
 from pathlib import Path
 
-import snakemake as smk
-
 from .patterns import (
     full_channel_pattern_with_extension,
     get_pattern_tier,
@@ -20,6 +19,18 @@
 #
 
 
+def regex_from_filepattern(filepattern):
+    f = []
+    last = 0
+    for match in re.compile(r"\{(?P<name>[\w]+)\}").finditer(filepattern):
+        wildcard = match.group("name")
+        f.append(f"(?P={wildcard})")
+        last = match.end()
+    f.append(re.escape(filepattern[last:]))
+    f.append("$")
+    return "".join(f)
+
+
 class FileKey(namedtuple("FileKey", ["experiment", "period", "run", "datatype", "timestamp"])):
     __slots__ = ()
 
@@ -51,18 +62,12 @@ def get_filekey_from_filename(cls, filename):
 
     @classmethod
     def get_filekey_from_pattern(cls, filename, pattern=None):
-        if pattern is None:
-            try:
-                key_pattern_rx = re.compile(smk.io.regex_from_filepattern(cls.key_pattern))
-            except AttributeError:
-                key_pattern_rx = re.compile(smk.io.regex(cls.key_pattern))
-        else:
-            if isinstance(pattern, Path):
-                pattern = pattern.as_posix()
-            try:
-                key_pattern_rx = re.compile(smk.io.regex_from_filepattern(pattern))
-            except AttributeError:
-                key_pattern_rx = re.compile(smk.io.regex(pattern))
+        if isinstance(pattern, Path):
+            pattern = pattern.as_posix()
+
+        key_pattern_rx = re.compile(
+            regex_from_filepattern(cls.key_pattern if pattern is None else pattern)
+        )
 
         if key_pattern_rx.match(filename) is None:
             return None
@@ -93,11 +98,14 @@ def parse_keypart(cls, keypart):
                 d[key] = "*"
         return cls(**d)
 
+    def expand(self, file_pattern, **kwargs):
+        wildcard_dict = dict(**self._asdict(), **kwargs)
+        formatter = string.Formatter()
+        return [formatter.vformat(file_pattern, (), wildcard_dict)]
+
     def get_path_from_filekey(self, pattern, **kwargs):
-        if isinstance(pattern, Path):
-            pattern = pattern.as_posix()
         if kwargs is None:
-            return smk.io.expand(pattern, **self._asdict())
+            return self.expand(pattern, **kwargs)
         else:
             for entry, value in kwargs.items():
                 if isinstance(value, dict):
@@ -105,7 +113,7 @@ def get_path_from_filekey(self, pattern, **kwargs):
                         kwargs[entry] = value[next(iter(set(value).intersection(self._list())))]
                     else:
                         kwargs.pop(entry)
-            return smk.io.expand(pattern, **self._asdict(), **kwargs)
+            return self.expand(pattern, **kwargs)
 
     # get_path_from_key
     @classmethod
@@ -172,7 +180,7 @@ def get_path_from_filekey(self, pattern, **kwargs):
         if not isinstance(pattern, str):
             pattern = pattern(self.tier, self.identifier)
         if kwargs is None:
-            return smk.io.expand(pattern, **self._asdict())
+            return self.expand(pattern, **kwargs)
         else:
             for entry, value in kwargs.items():
                 if isinstance(value, dict):
@@ -180,7 +188,7 @@ def get_path_from_filekey(self, pattern, **kwargs):
                         kwargs[entry] = value[next(iter(set(value).intersection(self._list())))]
                     else:
                         kwargs.pop(entry)
-            return smk.io.expand(pattern, **self._asdict(), **kwargs)
+            return self.expand(pattern, **kwargs)
 
 
 class ChannelProcKey(FileKey):
@@ -211,7 +219,9 @@ def get_channel_files(keypart, par_pattern, chan_list):
         for chan in chan_list:
             wildcards_dict = d._asdict()
             wildcards_dict.pop("channel")
-            file = smk.io.expand(par_pattern, **wildcards_dict, channel=chan)[0]
+            formatter = string.Formatter()
+            wildcards_dict["channel"] = chan
+            file = formatter.vformat(par_pattern, (), wildcards_dict)
             filenames.append(file)
         return filenames
 

From 4e1cfd5c81b303fbdcf7ee5f7ca3eb2ac9c99c30 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 12:23:04 +0100
Subject: [PATCH 065/101] rewrite execenv functions to use uv. remove
 setups/l200 prefix in config

---
 .gitignore                              |   1 +
 .pre-commit-config.yaml                 |   2 +-
 config-lngs.yaml                        | 140 +++++++++--------
 pyproject.toml                          |  49 +++---
 workflow/Snakefile                      |  43 +++---
 workflow/Snakefile-build-raw            |  29 ++--
 workflow/src/legenddataflow/__init__.py |  10 +-
 workflow/src/legenddataflow/execenv.py  | 197 +++++++++++++++++-------
 workflow/src/legenddataflow/utils.py    |  15 --
 9 files changed, 279 insertions(+), 207 deletions(-)

diff --git a/.gitignore b/.gitignore
index b4586b6..48a1541 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .snakemake
+workflow/src/legenddataflow/_version.py
 
 # New additions go at the top!
 *.c
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1b3a8b9..e369b65 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -95,7 +95,7 @@ repos:
     rev: "v4.0.0-alpha.8"
     hooks:
       - id: prettier
-        types_or: [yaml, markdown, json]
+        types_or: [yaml, markdown, json, toml]
 
   - repo: https://github.com/snakemake/snakefmt
     rev: v0.10.2
diff --git a/config-lngs.yaml b/config-lngs.yaml
index b14c913..faa456b 100644
--- a/config-lngs.yaml
+++ b/config-lngs.yaml
@@ -1,82 +1,80 @@
-setups:
-  l200:
-    paths:
-      sandbox_path: /data1/shared/l200-p13/sandbox
-      tier_daq: $_/../daq/generated/tier/daq
-      tier_raw_blind: ""
+paths:
+  sandbox_path: /data1/shared/l200-p13/sandbox
+  tier_daq: $_/../daq/generated/tier/daq
+  tier_raw_blind: ""
 
-      workflow: $_/workflow
+  workflow: $_/workflow
 
-      metadata: $_/inputs
-      config: $_/inputs/dataprod/config
-      par_overwrite: $_/inputs/dataprod/overrides
-      chan_map: $_/inputs/hardware/configuration
-      detector_status: $_/inputs/datasets
-      detector_db: $_/inputs/hardware/detectors
+  metadata: $_/inputs
+  config: $_/inputs/dataprod/config
+  par_overwrite: $_/inputs/dataprod/overrides
+  chan_map: $_/inputs/hardware/configuration
+  detector_status: $_/inputs/datasets
+  detector_db: $_/inputs/hardware/detectors
 
-      tier: $_/generated/tier
-      tier_raw: $_/generated/tier/raw
-      tier_tcm: $_/generated/tier/tcm
-      tier_dsp: $_/generated/tier/dsp
-      tier_hit: $_/generated/tier/hit
-      tier_ann: $_/generated/tier/ann
-      tier_evt: $_/generated/tier/evt
-      tier_psp: $_/generated/tier/psp
-      tier_pht: $_/generated/tier/pht
-      tier_pan: $_/generated/tier/pan
-      tier_pet: $_/generated/tier/pet
-      tier_skm: $_/generated/tier/skm
+  tier: $_/generated/tier
+  tier_raw: $_/generated/tier/raw
+  tier_tcm: $_/generated/tier/tcm
+  tier_dsp: $_/generated/tier/dsp
+  tier_hit: $_/generated/tier/hit
+  tier_ann: $_/generated/tier/ann
+  tier_evt: $_/generated/tier/evt
+  tier_psp: $_/generated/tier/psp
+  tier_pht: $_/generated/tier/pht
+  tier_pan: $_/generated/tier/pan
+  tier_pet: $_/generated/tier/pet
+  tier_skm: $_/generated/tier/skm
 
-      par: $_/generated/par
-      par_raw: $_/generated/par/raw
-      par_tcm: $_/generated/par/tcm
-      par_dsp: $_/generated/par/dsp
-      par_hit: $_/generated/par/hit
-      par_evt: $_/generated/par/evt
-      par_psp: $_/generated/par/psp
-      par_pht: $_/generated/par/pht
-      par_pet: $_/generated/par/pet
+  par: $_/generated/par
+  par_raw: $_/generated/par/raw
+  par_tcm: $_/generated/par/tcm
+  par_dsp: $_/generated/par/dsp
+  par_hit: $_/generated/par/hit
+  par_evt: $_/generated/par/evt
+  par_psp: $_/generated/par/psp
+  par_pht: $_/generated/par/pht
+  par_pet: $_/generated/par/pet
 
-      plt: $_/generated/plt
-      log: $_/generated/log
+  plt: $_/generated/plt
+  log: $_/generated/log
 
-      tmp_plt: $_/generated/tmp/plt
-      tmp_log: $_/generated/tmp/log
-      tmp_filelists: $_/generated/tmp/filelists
-      tmp_par: $_/generated/tmp/par
+  tmp_plt: $_/generated/tmp/plt
+  tmp_log: $_/generated/tmp/log
+  tmp_filelists: $_/generated/tmp/filelists
+  tmp_par: $_/generated/tmp/par
 
-      src: $_/software/python/src
-      install: $_/software/python/install
-      cache: $_/software/python/cache
+  src: $_/software/python/src
+  install: $_/software/python/install
+  cache: $_/software/python/cache
 
-    table_format:
-      raw: ch{ch:07d}/raw
-      dsp: ch{ch:07d}/dsp
-      psp: ch{ch:07d}/dsp
-      hit: ch{ch:07d}/hit
-      pht: ch{ch:07d}/hit
-      evt: "{grp}/evt"
-      pet: "{grp}/evt"
-      skm: "{grp}/skm"
-      tcm: hardware_tcm_1
+table_format:
+  raw: ch{ch:07d}/raw
+  dsp: ch{ch:07d}/dsp
+  psp: ch{ch:07d}/dsp
+  hit: ch{ch:07d}/hit
+  pht: ch{ch:07d}/hit
+  evt: "{grp}/evt"
+  pet: "{grp}/evt"
+  skm: "{grp}/skm"
+  tcm: hardware_tcm_1
 
-    execenv:
-      cmd: apptainer run
-      arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20221021210158.sif
-      env:
-        PRODENV: $PRODENV
-        HDF5_USE_FILE_LOCKING: "False"
-        LGDO_BOUNDSCHECK: "false"
-        DSPEED_BOUNDSCHECK: "false"
-        PYGAMA_PARALLEL: "false"
-        PYGAMA_FASTMATH: "false"
-        DISABLE_TQDM: "True"
+execenv:
+  cmd: apptainer run
+  arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20241110203225.sif
+  env:
+    PRODENV: $PRODENV
+    HDF5_USE_FILE_LOCKING: "False"
+    LGDO_BOUNDSCHECK: "false"
+    DSPEED_BOUNDSCHECK: "false"
+    PYGAMA_PARALLEL: "false"
+    PYGAMA_FASTMATH: "false"
+    DISABLE_TQDM: "True"
 
-    pkg_versions:
-      - pygama==2.0.*
-      - pylegendmeta==1.2.0a2
-      - dspeed==1.6.*
-      - legend-pydataobj>=1.11.4
-      - legend-daq2lh5==1.4.*
+pkg_versions:
+  - pygama==2.0.*
+  - pylegendmeta==1.2.0a2
+  - dspeed==1.6.*
+  - legend-pydataobj>=1.11.4
+  - legend-daq2lh5==1.4.*
 
-    legend_metadata_version: v0.5.7
+legend_metadata_version: v0.5.7
diff --git a/pyproject.toml b/pyproject.toml
index df67b42..944cfe5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,10 @@
 exclude = ["generated", "inputs", "software", "workflow"]
 
 [build-system]
-requires = ["setuptools>=61.0"]
+requires = [
+    "setuptools>=61.2",
+    "setuptools_scm[toml]>=7"
+]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
@@ -49,42 +52,38 @@ readme = "README.md"
 requires-python = ">=3.11"
 dynamic = ["version"]
 
+# these are the dependencies strictly required by legend-dataflow
 dependencies = [
     "dbetto>=1.0.5",
     "pylegendmeta==1.2.0a2",
 ]
 
 [project.optional-dependencies]
-full = [
-"snakemake>=8.16",
-]
-no_container = [
-  "legend_dataflow[full]",
-  "pygama",
-  "dspeed",
-  "pylegendmeta==1.2.0a2",
-  "legend-pydataobj",
-  "legend-daq2lh5",
+# these are needed to run the data production
+dataprod = [
+    "snakemake>=8.16",
+    "pygama",
+    "dspeed",
+    "pylegendmeta==1.2.0a2",
+    "legend-pydataobj",
+    "legend-daq2lh5",
 ]
 test = [
- "legend_dataflow[no_container]",
-  "pytest >=6",
-  "pytest-cov >=3",
+    "legend_dataflow[dataprod]",
+    "pytest>=6",
+    "pytest-cov>=3",
 ]
 dev = [
- "legend_dataflow[no_container]",
-  "pytest >=6",
-  "pytest-cov >=3",
-  "pre-commit",
+    "legend_dataflow[dataprod,test]",
+    "pre-commit",
 ]
 docs = [
-  "legend_dataflow[no_container]",
-  "sphinx>=7.0",
-  "myst_parser>=0.13",
-  "sphinx_inline_tabs",
-  "sphinx_copybutton",
-  "sphinx_autodoc_typehints",
-  "furo>=2023.08.17",
+    "sphinx>=7.0",
+    "myst_parser>=0.13",
+    "sphinx_inline_tabs",
+    "sphinx_copybutton",
+    "sphinx_autodoc_typehints",
+    "furo>=2023.08.17",
 ]
 
 [project.scripts]
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 011cb05..9fa6950 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -25,20 +25,19 @@ from legenddataflow import utils
 utils.subst_vars_in_snakemake_config(workflow, config)
 
 check_in_cycle = True
-setup = config["setups"]["l200"]
-configs = utils.config_path(setup)
-chan_maps = utils.chan_map_path(setup)
-meta = utils.metadata_path(setup)
-det_status = utils.det_status_path(setup)
-swenv = utils.runcmd(setup)
+configs = utils.config_path(config)
+chan_maps = utils.chan_map_path(config)
+meta = utils.metadata_path(config)
+det_status = utils.det_status_path(config)
+swenv = utils.runcmd(config)
 basedir = workflow.basedir
 
 # wait for new pylegendmeta release
 # if not Path(meta).exists():
 #     meta = LegendMetadata()
-#     meta.checkout(config["setups"]["l200"]["legend_metadata_version"])
+#     meta.checkout(config["configs"]["l200"]["legend_metadata_version"])
 
-part = CalGrouping(setup, Path(det_status) / "cal_groupings.yaml")
+part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml")
 
 
 wildcard_constraints:
@@ -81,7 +80,7 @@ onstart:
         shell('{swenv} python3 -B -c "import ' + pkg + '"')
 
         # Log parameter catalogs in validity.jsonl files
-    hit_par_cat_file = Path(utils.pars_path(setup)) / "hit" / "validity.yaml"
+    hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml"
     if hit_par_cat_file.is_file():
         hit_par_cat_file.unlink()
     try:
@@ -90,7 +89,7 @@ onstart:
     except NameError:
         print("No hit parameter catalog found")
 
-    pht_par_cat_file = Path(utils.pars_path(setup)) / "pht" / "validity.yaml"
+    pht_par_cat_file = Path(utils.pars_path(config)) / "pht" / "validity.yaml"
     if pht_par_cat_file.is_file():
         pht_par_cat_file.unlink()
     try:
@@ -99,7 +98,7 @@ onstart:
     except NameError:
         print("No pht parameter catalog found")
 
-    dsp_par_cat_file = Path(utils.pars_path(setup)) / "dsp" / "validity.yaml"
+    dsp_par_cat_file = Path(utils.pars_path(config)) / "dsp" / "validity.yaml"
     if dsp_par_cat_file.is_file():
         dsp_par_cat_file.unlink()
     try:
@@ -108,7 +107,7 @@ onstart:
     except NameError:
         print("No dsp parameter catalog found")
 
-    psp_par_cat_file = Path(utils.pars_path(setup)) / "psp" / "validity.yaml"
+    psp_par_cat_file = Path(utils.pars_path(config)) / "psp" / "validity.yaml"
     if psp_par_cat_file.is_file():
         psp_par_cat_file.unlink()
     try:
@@ -121,7 +120,7 @@ onstart:
 onsuccess:
     from snakemake.report import auto_report
 
-    rep_dir = f"{log_path(setup)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
+    rep_dir = f"{log_path(config)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
     Path(rep_dir).mkdir(parents=True, exist_ok=True)
     # auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
 
@@ -140,24 +139,24 @@ onsuccess:
             os.remove(file)
 
             # remove filelists
-    files = glob.glob(os.path.join(utils.filelist_path(setup), "*"))
+    files = glob.glob(os.path.join(utils.filelist_path(config), "*"))
     for file in files:
         if os.path.isfile(file):
             os.remove(file)
-    if os.path.exists(utils.filelist_path(setup)):
-        os.rmdir(utils.filelist_path(setup))
+    if os.path.exists(utils.filelist_path(config)):
+        os.rmdir(utils.filelist_path(config))
 
         # remove logs
-    files = glob.glob(os.path.join(utils.tmp_log_path(setup), "*", "*.log"))
+    files = glob.glob(os.path.join(utils.tmp_log_path(config), "*", "*.log"))
     for file in files:
         if os.path.isfile(file):
             os.remove(file)
-    dirs = glob.glob(os.path.join(utils.tmp_log_path(setup), "*"))
+    dirs = glob.glob(os.path.join(utils.tmp_log_path(config), "*"))
     for d in dirs:
         if os.path.isdir(d):
             os.rmdir(d)
-    if os.path.exists(utils.tmp_log_path(setup)):
-        os.rmdir(utils.tmp_log_path(setup))
+    if os.path.exists(utils.tmp_log_path(config)):
+        os.rmdir(utils.tmp_log_path(config))
 
 
 rule gen_filelist:
@@ -171,12 +170,12 @@ rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
             wildcards,
-            setup,
+            config,
             get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"),
+        temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"),
     script:
         "scripts/write_filelist.py"
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index 763cb8a..abb1b34 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -9,24 +9,19 @@ to the blinded raw data. It handles:
 import os, sys
 from pathlib import Path
 from legenddataflow import patterns as patt
-from legenddataflow import utils, ParsKeyResolve
-
-check_in_cycle = True
+from legenddataflow import utils, execenv, ParsKeyResolve
 
 utils.subst_vars_in_snakemake_config(workflow, config)
 
-setup = config["setups"]["l200"]
-configs = utils.config_path(setup)
-chan_maps = utils.chan_map_path(setup)
-swenv = utils.runcmd(setup)
-meta = utils.metadata_path(setup)
-det_status = utils.det_status_path(setup)
-basedir = workflow.basedir
+check_in_cycle = True
+swenv = execenv.execenv_prefix(config)
+# meta = utils.metadata_path(config)
+det_status = utils.det_status_path(config)
 
 # wait for new pylegendmeta release
 # if not Path(meta).exists():
 #     meta = LegendMetadata()
-#     meta.checkout(config["setups"]["l200"]["legend_metadata_version"])
+#     meta.checkout(config["configs"]["l200"]["legend_metadata_version"])
 
 
 wildcard_constraints:
@@ -56,7 +51,7 @@ onstart:
     shell('{swenv} python3 -B -c "import daq2lh5 "')
 
 
-    raw_par_cat_file = Path(utils.pars_path(setup)) / "raw" / "validity.yaml"
+    raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml"
     if raw_par_cat_file.is_file():
         raw_par_cat_file.unlink()
     try:
@@ -69,20 +64,20 @@ onstart:
 onsuccess:
     print("Workflow finished, no error")
     shell("rm *.gen || true")
-    shell(f"rm {utils.filelist_path(setup)}/* || true")
+    shell(f"rm {utils.filelist_path(config)}/* || true")
 
 
 rule gen_filelist:
     input:
         lambda wildcards: get_filelist(
             wildcards,
-            setup,
+            config,
             get_search_pattern(wildcards.tier),
             ignore_keys_file=Path(det_status) / "ignored_daq_cycles.yaml",
             analysis_runs_file=Path(det_status) / "runlists.yaml",
         ),
     output:
-        temp(Path(utils.filelist_path(setup)) / "{label}-{tier}.filelist"),
+        temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"),
     script:
         "scripts/write_filelist.py"
 
@@ -93,9 +88,9 @@ rule sort_data:
     to the sorted dirs under generated
     """
     input:
-        patt.get_pattern_tier_daq_unsorted(setup, extension="fcio"),
+        patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
     output:
-        patt.get_pattern_tier_daq(setup, extension="fcio"),
+        patt.get_pattern_tier_daq(config, extension="fcio"),
     shell:
         "mv {input} {output}"
 
diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py
index f812d11..223914e 100644
--- a/workflow/src/legenddataflow/__init__.py
+++ b/workflow/src/legenddataflow/__init__.py
@@ -1,9 +1,13 @@
 from .cal_grouping import CalGrouping
 from .create_pars_keylist import ParsKeyResolve
+from .execenv import (
+    execenv_prefix,
+    execenv_python,
+    execenv_smk_py_script,
+)
 from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey
 from .pars_loading import ParsCatalog
 from .utils import (
-    runcmd,
     subst_vars,
     subst_vars_impl,
     subst_vars_in_snakemake_config,
@@ -17,7 +21,9 @@
     "ParsCatalog",
     "ParsKeyResolve",
     "ProcessingFileKey",
-    "runcmd",
+    "execenv_prefix",
+    "execenv_python",
+    "execenv_smk_py_script",
     "subst_vars",
     "subst_vars",
     "subst_vars_impl",
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index fa91400..b119dd1 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -1,16 +1,70 @@
-# ruff: noqa: T201
 from __future__ import annotations
 
 import argparse
+import logging
 import os
+import shlex
 import shutil
 import string
 import subprocess
 from pathlib import Path
 
 import dbetto
+from dbetto import AttrsDict
 from packaging.requirements import Requirement
 
+from . import utils
+
+log = logging.getLogger(__name__)
+
+
+def execenv_prefix(config, aslist=False):
+    """Returns the software environment command prefix.
+
+    For example: `apptainer run image.sif`
+    """
+    config = AttrsDict(config)
+
+    cmdline = shlex.split(config.execenv.cmd)
+    if "env" in config.execenv:
+        cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()]
+
+    cmdline += shlex.split(config.execenv.arg)
+
+    if aslist:
+        return cmdline
+    return " ".join(cmdline)
+
+
+def execenv_python(config, aslist=False):
+    """Returns the Python interpreter command.
+
+    For example: `apptainer run image.sif python`
+    """
+    config = AttrsDict(config)
+
+    cmdline = execenv_prefix(config, aslist=True)
+    cmdline.append(f"{config.paths.install}/bin/python")
+
+    if aslist:
+        return cmdline
+    return " ".join(cmdline)
+
+
+def execenv_smk_py_script(workflow, config, scriptname, aslist=False):
+    """Returns the command used to run a Python script for a Snakemake rule.
+
+    For example: `apptainer run image.sif python path/to/script.py`
+    """
+    config = AttrsDict(config)
+
+    cmdline = execenv_python(config, aslist=True)
+    cmdline.append(f"{workflow.basedir}/scripts/{scriptname}")
+
+    if aslist:
+        return cmdline
+    return " ".join(cmdline)
+
 
 def dataprod() -> None:
     """dataprod's command-line interface for installing and loading the software in the data production environment.
@@ -57,38 +111,76 @@ def install(args) -> None:
     format:
 
     ```yaml
-    setups:
-      l200:
-        pkg_versions:
-          - python_package_spec
+    pkg_versions:
+      - python_package_spec
+      - ...
     ```
     """
-    print(args.config_file)
-    if not Path(args.config_file).is_file():
-        msg = "config file is not a regular file"
-        raise RuntimeError(msg)
+    config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file))
+    config_loc = Path(args.config_file).resolve().parent
+    path_install = config_dict.paths.install
 
-    config_file_dir = Path(args.config_file).resolve().parent
-    config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file))
+    if args.r and Path(path_install).exists():
+        shutil.rmtree(path_install)
 
-    exec_cmd = config_dic.setups.l200.execenv.cmd
-    exec_arg = config_dic.setups.l200.execenv.arg
-    path_src = config_dic.setups.l200.paths.src
-    path_install = config_dic.setups.l200.paths.install
-    path_cache = config_dic.setups.l200.paths.cache
+    utils.subst_vars(
+        config_dict,
+        var_values={"_": config_loc},
+        use_env=True,
+        ignore_missing=False,
+    )
 
-    exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
-    exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})
-    path_src = Path(string.Template(path_src).substitute({"_": config_file_dir}))
-    path_install = Path(string.Template(path_install).substitute({"_": config_file_dir}))
-    path_cache = Path(string.Template(path_cache).substitute({"_": config_file_dir}))
+    cmd_env = {}
 
-    if args.r:
-        shutil.rmtree(path_install)
-        shutil.rmtree(path_cache)
+    def _runcmd(cmd_env, cmd_expr):
+        msg = (
+            "running:"
+            + " ".join([f"{k}={v}" for k, v in cmd_env.items()])
+            + " "
+            + " ".join(cmd_expr),
+        )
+        log.debug(msg)
+
+        subprocess.run(cmd_expr, env=cmd_env, check=True)
 
+    # configure venv
+    cmd_expr = [*execenv_prefix(config_dict, aslist=True), "python3", "-m", "venv", path_install]
+
+    log.info(f"configuring virtual environment in {path_install}")
+    _runcmd(cmd_env, cmd_expr)
+
+    cmd_expr = [
+        *execenv_python(config_dict, aslist=True),
+        "-m",
+        "pip",
+        "--no-cache-dir",
+        "install",
+        "--upgrade",
+        "pip",
+    ]
+
+    log.info("upgrading pip")
+    _runcmd(cmd_env, cmd_expr)
+
+    # install uv
+    cmd_expr = [
+        *execenv_python(config_dict, aslist=True),
+        "-m",
+        "pip",
+        "--no-cache-dir",
+        "install",
+        "--no-warn-script-location",
+        "uv",
+    ]
+
+    log.info("installing uv")
+    _runcmd(cmd_env, cmd_expr)
+
+    # now packages
+
+    path_src = Path(config_dict.paths.src)
     pkg_list = []
-    for spec in config_dic.setups.l200.pkg_versions:
+    for spec in config_dict.pkg_versions:
         pkg = Requirement(spec).name
         if (path_src / pkg).exists():
             pkg_list.append(str(path_src / pkg))
@@ -96,50 +188,47 @@ def install(args) -> None:
             pkg_list.append(spec)
 
     cmd_base = [
-        *(exec_cmd.split()),
-        exec_arg,
-        "python3",
-        "-B",
+        *execenv_python(config_dict, aslist=True),
         "-m",
+        "uv",
         "pip",
+        "--no-cache",
         "install",
-        "--no-warn-script-location",
     ]
 
     cmd_expr = cmd_base + pkg_list
-    cmdenv = {
-        "PYTHONUSERBASE": path_install,
-        "PIP_CACHE_DIR": path_cache,
-    }
-
-    print(
-        "INFO: running:",
-        " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + " " + " ".join(cmd_expr),
-    )
 
-    subprocess.run(
-        cmd_expr,
-        env=cmdenv,
-        check=True,
-    )
+    log.info("installing packages")
+    _runcmd(cmd_env, cmd_expr)
+
+    # and finally legenddataflow
+
+    cmd_expr = [
+        *execenv_python(config_dict, aslist=True),
+        "-m",
+        "uv",
+        "pip",
+        "--no-cache",
+        "install",
+        # "--editable",  # TODO do we really want this?
+        str(config_loc),
+    ]
+
+    log.info("installing packages")
+    _runcmd(cmd_env, cmd_expr)
 
 
 def cmdexec(args) -> None:
     """
     This function loads the data production environment and executes a given command.
     """
-
-    if not Path(args.config_file).is_file():
-        msg = "config file is not a regular file"
-        raise RuntimeError(msg)
-
     config_file_dir = Path(args.config_file).resolve().parent
-    config_dic = dbetto.AttrsDict(dbetto.utils.load_dict(args.config_file))
+    config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file))
 
-    exec_cmd = config_dic.setups.l200.execenv.cmd
-    exec_arg = config_dic.setups.l200.execenv.arg
-    env_vars = config_dic.setups.l200.execenv.env
-    path_install = config_dic.setups.l200.paths.install
+    exec_cmd = config_dict.execenv.cmd
+    exec_arg = config_dict.execenv.arg
+    env_vars = config_dict.execenv.env
+    path_install = config_dict.paths.install
 
     exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
     exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})
diff --git a/workflow/src/legenddataflow/utils.py b/workflow/src/legenddataflow/utils.py
index 0b45a81..6bcbb01 100644
--- a/workflow/src/legenddataflow/utils.py
+++ b/workflow/src/legenddataflow/utils.py
@@ -8,7 +8,6 @@
 import copy
 import os
 import re
-import shlex
 import string
 from datetime import datetime
 from pathlib import Path
@@ -108,20 +107,6 @@ def filelist_path(setup):
     return setup["paths"]["tmp_filelists"]
 
 
-def runcmd(setup, aslist=False):
-    cmdline = shlex.split(setup["execenv"]["cmd"])
-    cmdline += ["--env=" + "PYTHONUSERBASE=" + f"{setup['paths']['install']}"]
-    if "env" in setup["execenv"]:
-        cmdline += [f"--env={var}={val}" for var, val in setup["execenv"]["env"].items()]
-
-    cmdline += shlex.split(setup["execenv"]["arg"])
-
-    if aslist:
-        return cmdline
-
-    return " ".join(cmdline)
-
-
 def subst_vars_impl(x, var_values, ignore_missing=False):
     if isinstance(x, str):
         if "$" in x:

From c27b1b1c51d42922730a82f3c4808b5efce44df6 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 13:49:32 +0100
Subject: [PATCH 066/101] install uv only if not already available

---
 workflow/src/legenddataflow/execenv.py | 84 ++++++++++++++++----------
 1 file changed, 53 insertions(+), 31 deletions(-)

diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index b119dd1..c49d04f 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -118,10 +118,6 @@ def install(args) -> None:
     """
     config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file))
     config_loc = Path(args.config_file).resolve().parent
-    path_install = config_dict.paths.install
-
-    if args.r and Path(path_install).exists():
-        shutil.rmtree(path_install)
 
     utils.subst_vars(
         config_dict,
@@ -130,6 +126,11 @@ def install(args) -> None:
         ignore_missing=False,
     )
 
+    path_install = config_dict.paths.install
+
+    if args.r and Path(path_install).exists():
+        shutil.rmtree(path_install)
+
     cmd_env = {}
 
     def _runcmd(cmd_env, cmd_expr):
@@ -143,38 +144,59 @@ def _runcmd(cmd_env, cmd_expr):
 
         subprocess.run(cmd_expr, env=cmd_env, check=True)
 
+    has_uv = False
+    try:
+        subprocess.run(
+            [*execenv_prefix(config_dict, aslist=True), "uv", "--version"],
+            capture_output=True,
+            check=True,
+        )
+        has_uv = True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        pass
+
     # configure venv
-    cmd_expr = [*execenv_prefix(config_dict, aslist=True), "python3", "-m", "venv", path_install]
+    if has_uv:
+        cmd_expr = [*execenv_prefix(config_dict, aslist=True), "uv", "venv", path_install]
+    else:
+        cmd_expr = [
+            *execenv_prefix(config_dict, aslist=True),
+            "python3",
+            "-m",
+            "venv",
+            path_install,
+        ]
 
     log.info(f"configuring virtual environment in {path_install}")
     _runcmd(cmd_env, cmd_expr)
 
-    cmd_expr = [
-        *execenv_python(config_dict, aslist=True),
-        "-m",
-        "pip",
-        "--no-cache-dir",
-        "install",
-        "--upgrade",
-        "pip",
-    ]
-
-    log.info("upgrading pip")
-    _runcmd(cmd_env, cmd_expr)
-
-    # install uv
-    cmd_expr = [
-        *execenv_python(config_dict, aslist=True),
-        "-m",
-        "pip",
-        "--no-cache-dir",
-        "install",
-        "--no-warn-script-location",
-        "uv",
-    ]
-
-    log.info("installing uv")
-    _runcmd(cmd_env, cmd_expr)
+    if not has_uv:
+        cmd_expr = [
+            *execenv_python(config_dict, aslist=True),
+            "-m",
+            "pip",
+            "--no-cache-dir",
+            "install",
+            "--upgrade",
+            "pip",
+        ]
+
+        log.info("upgrading pip")
+        _runcmd(cmd_env, cmd_expr)
+
+        # install uv
+        cmd_expr = [
+            *execenv_python(config_dict, aslist=True),
+            "-m",
+            "pip",
+            "--no-cache-dir",
+            "install",
+            "--no-warn-script-location",
+            "uv",
+        ]
+
+        log.info("installing uv")
+        _runcmd(cmd_env, cmd_expr)
 
     # now packages
 

From 6ec902d3023a2f5f1f7fbf56623ecfe4b966f244 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 14:07:39 +0100
Subject: [PATCH 067/101] improve config-lngs.yaml

---
 config-lngs.yaml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/config-lngs.yaml b/config-lngs.yaml
index faa456b..356aa4e 100644
--- a/config-lngs.yaml
+++ b/config-lngs.yaml
@@ -44,8 +44,7 @@ paths:
   tmp_par: $_/generated/tmp/par
 
   src: $_/software/python/src
-  install: $_/software/python/install
-  cache: $_/software/python/cache
+  install: $_/.snakemake/legend-dataflow/venv
 
 table_format:
   raw: ch{ch:07d}/raw
@@ -59,7 +58,7 @@ table_format:
   tcm: hardware_tcm_1
 
 execenv:
-  cmd: apptainer run
+  cmd: apptainer exec
   arg: /data2/public/prodenv/containers/legendexp_legend-base_latest_20241110203225.sif
   env:
     PRODENV: $PRODENV

From cf093d8e743337eac71caf4a63a8ef8eb4a3fb79 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 14:43:29 +0100
Subject: [PATCH 068/101] also fix dataprod exec

---
 pyproject.toml                         |   1 +
 workflow/src/legenddataflow/execenv.py | 150 ++++++++++++-------------
 2 files changed, 74 insertions(+), 77 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 944cfe5..06120d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,7 @@ dynamic = ["version"]
 
 # these are the dependencies strictly required by legend-dataflow
 dependencies = [
+    "colorlog",
     "dbetto>=1.0.5",
     "pylegendmeta==1.2.0a2",
 ]
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index c49d04f..2bf433c 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -5,10 +5,10 @@
 import os
 import shlex
 import shutil
-import string
 import subprocess
 from pathlib import Path
 
+import colorlog
 import dbetto
 from dbetto import AttrsDict
 from packaging.requirements import Requirement
@@ -27,13 +27,19 @@ def execenv_prefix(config, aslist=False):
 
     cmdline = shlex.split(config.execenv.cmd)
     if "env" in config.execenv:
+        # FIXME: this is not portable, only works with Apptainer and Docker
         cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()]
 
+    cmdenv = {}
+    xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR")
+    if xdg_runtime_dir:
+        cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir
+
     cmdline += shlex.split(config.execenv.arg)
 
     if aslist:
-        return cmdline
-    return " ".join(cmdline)
+        return cmdline, cmdenv
+    return " ".join(cmdline), cmdenv
 
 
 def execenv_python(config, aslist=False):
@@ -43,12 +49,12 @@ def execenv_python(config, aslist=False):
     """
     config = AttrsDict(config)
 
-    cmdline = execenv_prefix(config, aslist=True)
+    cmdline, cmdenv = execenv_prefix(config, aslist=True)
     cmdline.append(f"{config.paths.install}/bin/python")
 
     if aslist:
-        return cmdline
-    return " ".join(cmdline)
+        return cmdline, cmdenv
+    return " ".join(cmdline), cmdenv
 
 
 def execenv_smk_py_script(workflow, config, scriptname, aslist=False):
@@ -58,12 +64,12 @@ def execenv_smk_py_script(workflow, config, scriptname, aslist=False):
     """
     config = AttrsDict(config)
 
-    cmdline = execenv_python(config, aslist=True)
+    cmdline, cmdenv = execenv_python(config, aslist=True)
     cmdline.append(f"{workflow.basedir}/scripts/{scriptname}")
 
     if aslist:
-        return cmdline
-    return " ".join(cmdline)
+        return cmdline, cmdenv
+    return " ".join(cmdline), cmdenv
 
 
 def dataprod() -> None:
@@ -79,15 +85,18 @@ def dataprod() -> None:
         prog="dataprod", description="dataprod's command-line interface"
     )
 
+    parser.add_argument("-v", "--verbose", help="increase verbosity", action="store_true")
+
     subparsers = parser.add_subparsers()
     parser_install = subparsers.add_parser(
         "install", help="install user software in data production environment"
     )
+    parser_install.add_argument("config_file", help="production cycle configuration file")
     parser_install.add_argument(
-        "config_file", help="production cycle configuration file", type=str
-    )
-    parser_install.add_argument(
-        "-r", help="remove software directory before installing software", action="store_true"
+        "-r",
+        "--remove",
+        help="remove software directory before installing software",
+        action="store_true",
     )
     parser_install.set_defaults(func=install)
 
@@ -101,6 +110,17 @@ def dataprod() -> None:
     parser_exec.set_defaults(func=cmdexec)
 
     args = parser.parse_args()
+
+    if args.verbose:
+        handler = colorlog.StreamHandler()
+        handler.setFormatter(
+            colorlog.ColoredFormatter("%(log_color)s%(name)s [%(levelname)s] %(message)s")
+        )
+
+        logger = logging.getLogger("legenddataflow")
+        logger.setLevel(logging.DEBUG)
+        logger.addHandler(handler)
+
     args.func(args)
 
 
@@ -128,28 +148,28 @@ def install(args) -> None:
 
     path_install = config_dict.paths.install
 
-    if args.r and Path(path_install).exists():
+    if args.remove and Path(path_install).exists():
         shutil.rmtree(path_install)
 
-    cmd_env = {}
-
-    def _runcmd(cmd_env, cmd_expr):
+    def _runcmd(cmd_expr, cmd_env, **kwargs):
         msg = (
-            "running:"
+            "running: "
             + " ".join([f"{k}={v}" for k, v in cmd_env.items()])
             + " "
             + " ".join(cmd_expr),
         )
         log.debug(msg)
 
-        subprocess.run(cmd_expr, env=cmd_env, check=True)
+        subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs)
+
+    cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True)
 
     has_uv = False
     try:
-        subprocess.run(
-            [*execenv_prefix(config_dict, aslist=True), "uv", "--version"],
+        _runcmd(
+            [*cmd_prefix, "uv", "--version"],
+            cmd_env,
             capture_output=True,
-            check=True,
         )
         has_uv = True
     except (subprocess.CalledProcessError, FileNotFoundError):
@@ -157,22 +177,18 @@ def _runcmd(cmd_env, cmd_expr):
 
     # configure venv
     if has_uv:
-        cmd_expr = [*execenv_prefix(config_dict, aslist=True), "uv", "venv", path_install]
+        cmd_expr = [*cmd_prefix, "uv", "venv", path_install]
     else:
-        cmd_expr = [
-            *execenv_prefix(config_dict, aslist=True),
-            "python3",
-            "-m",
-            "venv",
-            path_install,
-        ]
+        cmd_expr = [*cmd_prefix, "python3", "-m", "venv", path_install]
 
     log.info(f"configuring virtual environment in {path_install}")
-    _runcmd(cmd_env, cmd_expr)
+    _runcmd(cmd_expr, cmd_env)
+
+    python, cmd_env = execenv_python(config_dict, aslist=True)
 
     if not has_uv:
         cmd_expr = [
-            *execenv_python(config_dict, aslist=True),
+            *python,
             "-m",
             "pip",
             "--no-cache-dir",
@@ -182,11 +198,11 @@ def _runcmd(cmd_env, cmd_expr):
         ]
 
         log.info("upgrading pip")
-        _runcmd(cmd_env, cmd_expr)
+        _runcmd(cmd_expr, cmd_env)
 
         # install uv
         cmd_expr = [
-            *execenv_python(config_dict, aslist=True),
+            *python,
             "-m",
             "pip",
             "--no-cache-dir",
@@ -196,7 +212,7 @@ def _runcmd(cmd_env, cmd_expr):
         ]
 
         log.info("installing uv")
-        _runcmd(cmd_env, cmd_expr)
+        _runcmd(cmd_expr, cmd_env)
 
     # now packages
 
@@ -209,24 +225,17 @@ def _runcmd(cmd_env, cmd_expr):
         else:
             pkg_list.append(spec)
 
-    cmd_base = [
-        *execenv_python(config_dict, aslist=True),
-        "-m",
-        "uv",
-        "pip",
-        "--no-cache",
-        "install",
-    ]
+    cmd_base = [*python, "-m", "uv", "pip", "--no-cache", "install"]
 
     cmd_expr = cmd_base + pkg_list
 
     log.info("installing packages")
-    _runcmd(cmd_env, cmd_expr)
+    _runcmd(cmd_expr, cmd_env)
 
     # and finally legenddataflow
 
     cmd_expr = [
-        *execenv_python(config_dict, aslist=True),
+        *python,
         "-m",
         "uv",
         "pip",
@@ -237,45 +246,32 @@ def _runcmd(cmd_env, cmd_expr):
     ]
 
     log.info("installing packages")
-    _runcmd(cmd_env, cmd_expr)
+    _runcmd(cmd_expr, cmd_env)
 
 
 def cmdexec(args) -> None:
     """
     This function loads the data production environment and executes a given command.
     """
-    config_file_dir = Path(args.config_file).resolve().parent
     config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file))
+    config_loc = Path(args.config_file).resolve().parent
 
-    exec_cmd = config_dict.execenv.cmd
-    exec_arg = config_dict.execenv.arg
-    env_vars = config_dict.execenv.env
-    path_install = config_dict.paths.install
+    utils.subst_vars(
+        config_dict,
+        var_values={"_": config_loc},
+        use_env=True,
+        ignore_missing=False,
+    )
 
-    exec_cmd = string.Template(exec_cmd).substitute({"_": config_file_dir})
-    exec_arg = string.Template(exec_arg).substitute({"_": config_file_dir})
-    path_install = string.Template(path_install).substitute({"_": config_file_dir})
+    cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True)
+    cmd_expr = [*cmd_prefix, *args.command]
 
-    xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR")
-    if xdg_runtime_dir:
-        subprocess.run(
-            [*(exec_cmd.split()), exec_arg, *args.command],
-            env=dict(
-                PYTHONUSERBASE=path_install,
-                APPTAINERENV_APPEND_PATH=f":{path_install}/bin",
-                APPTAINER_BINDPATH=xdg_runtime_dir,
-                **env_vars,
-            ),
-            check=True,
-        )
-    else:
-        subprocess.run(
-            [*(exec_cmd.split()), exec_arg, *args.command],
-            env=dict(
-                PYTHONUSERBASE=path_install,
-                APPTAINERENV_APPEND_PATH=f":{path_install}/bin",
-                APPTAINER_BINDPATH=xdg_runtime_dir,
-                **env_vars,
-            ),
-            check=True,
-        )
+    msg = (
+        "running: "
+        + " ".join([f"{k}={v}" for k, v in cmd_env.items()])
+        + " "
+        + " ".join(cmd_expr),
+    )
+    log.debug(msg)
+
+    subprocess.run(cmd_expr, env=cmd_env, check=True)

From a3d34d6ac9de58bbf6dc3b9d5ecf30d11e46e821 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 14:59:13 +0100
Subject: [PATCH 069/101] config.execenv can be left unspecified

---
 workflow/src/legenddataflow/execenv.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index 2bf433c..ad54d11 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -25,17 +25,21 @@ def execenv_prefix(config, aslist=False):
     """
     config = AttrsDict(config)
 
-    cmdline = shlex.split(config.execenv.cmd)
-    if "env" in config.execenv:
-        # FIXME: this is not portable, only works with Apptainer and Docker
-        cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()]
-
-    cmdenv = {}
-    xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR")
-    if xdg_runtime_dir:
-        cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir
-
-    cmdline += shlex.split(config.execenv.arg)
+    if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv:
+        cmdline = shlex.split(config.execenv.cmd)
+        if "env" in config.execenv:
+            # FIXME: this is not portable, only works with Apptainer and Docker
+            cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()]
+
+        cmdenv = {}
+        xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR")
+        if xdg_runtime_dir:
+            cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir
+
+        cmdline += shlex.split(config.execenv.arg)
+    else:
+        cmdenv = {}
+        cmdline = []
 
     if aslist:
         return cmdline, cmdenv

From a0d68420413c9bf77a969670188455e647f1f348 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 15:05:10 +0100
Subject: [PATCH 070/101] move scripts in legenddataflow package and make
 imports relative

---
 pyproject.toml                                            | 2 +-
 workflow/Snakefile-build-raw                              | 8 +++-----
 workflow/src/legenddataflow/__init__.py                   | 4 ----
 workflow/{ => src/legenddataflow}/scripts/__init__.py     | 0
 .../legenddataflow}/scripts/blinding_calibration.py       | 0
 workflow/{ => src/legenddataflow}/scripts/build_dsp.py    | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/build_evt.py    | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/build_fdb.py    | 0
 workflow/{ => src/legenddataflow}/scripts/build_hit.py    | 3 ++-
 .../{ => src/legenddataflow}/scripts/build_raw_blind.py   | 3 ++-
 .../{ => src/legenddataflow}/scripts/build_raw_fcio.py    | 3 ++-
 .../{ => src/legenddataflow}/scripts/build_raw_orca.py    | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/build_skm.py    | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/build_tcm.py    | 3 ++-
 .../{ => src/legenddataflow}/scripts/check_blinding.py    | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/complete_run.py | 6 +++---
 .../legenddataflow}/scripts/create_chankeylist.py         | 0
 .../{ => src/legenddataflow}/scripts/merge_channels.py    | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/par_psp_geds.py | 3 ++-
 .../legenddataflow}/scripts/pars_dsp_build_svm_geds.py    | 3 ++-
 .../legenddataflow}/scripts/pars_dsp_dplms_geds.py        | 3 ++-
 .../legenddataflow}/scripts/pars_dsp_eopt_geds.py         | 3 ++-
 .../legenddataflow}/scripts/pars_dsp_evtsel_geds.py       | 3 ++-
 .../legenddataflow}/scripts/pars_dsp_nopt_geds.py         | 3 ++-
 .../{ => src/legenddataflow}/scripts/pars_dsp_svm_geds.py | 0
 .../{ => src/legenddataflow}/scripts/pars_dsp_tau_geds.py | 3 ++-
 workflow/{ => src/legenddataflow}/scripts/pars_hit_aoe.py | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_hit_ecal.py     | 5 +++--
 workflow/{ => src/legenddataflow}/scripts/pars_hit_lq.py  | 5 +++--
 workflow/{ => src/legenddataflow}/scripts/pars_hit_qc.py  | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_pht_aoecal.py   | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_pht_fast.py     | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_pht_lqcal.py    | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_pht_partcal.py  | 5 +++--
 workflow/{ => src/legenddataflow}/scripts/pars_pht_qc.py  | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_pht_qc_phy.py   | 5 +++--
 .../{ => src/legenddataflow}/scripts/pars_tcm_pulser.py   | 3 ++-
 .../{ => src/legenddataflow}/scripts/write_filelist.py    | 0
 38 files changed, 73 insertions(+), 51 deletions(-)
 rename workflow/{ => src/legenddataflow}/scripts/__init__.py (100%)
 rename workflow/{ => src/legenddataflow}/scripts/blinding_calibration.py (100%)
 rename workflow/{ => src/legenddataflow}/scripts/build_dsp.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/build_evt.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/build_fdb.py (100%)
 rename workflow/{ => src/legenddataflow}/scripts/build_hit.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/build_raw_blind.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/build_raw_fcio.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/build_raw_orca.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/build_skm.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/build_tcm.py (97%)
 rename workflow/{ => src/legenddataflow}/scripts/check_blinding.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/complete_run.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/create_chankeylist.py (100%)
 rename workflow/{ => src/legenddataflow}/scripts/merge_channels.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/par_psp_geds.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_build_svm_geds.py (97%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_dplms_geds.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_eopt_geds.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_evtsel_geds.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_nopt_geds.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_svm_geds.py (100%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_dsp_tau_geds.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_hit_aoe.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_hit_ecal.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_hit_lq.py (95%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_hit_qc.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_pht_aoecal.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_pht_fast.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_pht_lqcal.py (96%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_pht_partcal.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_pht_qc.py (99%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_pht_qc_phy.py (98%)
 rename workflow/{ => src/legenddataflow}/scripts/pars_tcm_pulser.py (97%)
 rename workflow/{ => src/legenddataflow}/scripts/write_filelist.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index 06120d2..27e62e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,4 +88,4 @@ docs = [
 ]
 
 [project.scripts]
-dataprod = "legenddataflow.execenv:dataprod"
+dataprod = "legenddataflow.library.execenv:dataprod"
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index abb1b34..fafd20c 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -15,13 +15,11 @@ utils.subst_vars_in_snakemake_config(workflow, config)
 
 check_in_cycle = True
 swenv = execenv.execenv_prefix(config)
-# meta = utils.metadata_path(config)
+meta_path = utils.metadata_path(config)
 det_status = utils.det_status_path(config)
 
-# wait for new pylegendmeta release
-# if not Path(meta).exists():
-#     meta = LegendMetadata()
-#     meta.checkout(config["configs"]["l200"]["legend_metadata_version"])
+if not Path(meta_path).exists():
+    LegendMetadata(meta_path).checkout(config["legend_metadata_version"])
 
 
 wildcard_constraints:
diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py
index 223914e..ca8ddbb 100644
--- a/workflow/src/legenddataflow/__init__.py
+++ b/workflow/src/legenddataflow/__init__.py
@@ -9,7 +9,6 @@
 from .pars_loading import ParsCatalog
 from .utils import (
     subst_vars,
-    subst_vars_impl,
     subst_vars_in_snakemake_config,
     unix_time,
 )
@@ -25,9 +24,6 @@
     "execenv_python",
     "execenv_smk_py_script",
     "subst_vars",
-    "subst_vars",
-    "subst_vars_impl",
     "subst_vars_in_snakemake_config",
     "unix_time",
-    "unix_time",
 ]
diff --git a/workflow/scripts/__init__.py b/workflow/src/legenddataflow/scripts/__init__.py
similarity index 100%
rename from workflow/scripts/__init__.py
rename to workflow/src/legenddataflow/scripts/__init__.py
diff --git a/workflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py
similarity index 100%
rename from workflow/scripts/blinding_calibration.py
rename to workflow/src/legenddataflow/scripts/blinding_calibration.py
diff --git a/workflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py
similarity index 99%
rename from workflow/scripts/build_dsp.py
rename to workflow/src/legenddataflow/scripts/build_dsp.py
index f6e44df..137eb1f 100644
--- a/workflow/scripts/build_dsp.py
+++ b/workflow/src/legenddataflow/scripts/build_dsp.py
@@ -8,10 +8,11 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from dspeed import build_dsp
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo import lh5
 
+from ..log import build_log
+
 
 def replace_list_with_array(dic):
     for key, value in dic.items():
diff --git a/workflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py
similarity index 99%
rename from workflow/scripts/build_evt.py
rename to workflow/src/legenddataflow/scripts/build_evt.py
index 5eac164..b0bf2a4 100644
--- a/workflow/scripts/build_evt.py
+++ b/workflow/src/legenddataflow/scripts/build_evt.py
@@ -6,11 +6,12 @@
 import lgdo.lh5 as lh5
 import numpy as np
 from dbetto import Props, TextDB
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.types import Array
 from pygama.evt import build_evt
 
+from ..log import build_log
+
 sto = lh5.LH5Store()
 
 
diff --git a/workflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py
similarity index 100%
rename from workflow/scripts/build_fdb.py
rename to workflow/src/legenddataflow/scripts/build_fdb.py
diff --git a/workflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py
similarity index 98%
rename from workflow/scripts/build_hit.py
rename to workflow/src/legenddataflow/scripts/build_hit.py
index f096e0c..4f31947 100644
--- a/workflow/scripts/build_hit.py
+++ b/workflow/src/legenddataflow/scripts/build_hit.py
@@ -3,11 +3,12 @@
 from pathlib import Path
 
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata, TextDB
 from lgdo import lh5
 from pygama.hit.build_hit import build_hit
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
 argparser.add_argument("--pars_file", help="hit pars file", nargs="*")
diff --git a/workflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py
similarity index 99%
rename from workflow/scripts/build_raw_blind.py
rename to workflow/src/legenddataflow/scripts/build_raw_blind.py
index 5d582d4..ef704dd 100644
--- a/workflow/scripts/build_raw_blind.py
+++ b/workflow/src/legenddataflow/scripts/build_raw_blind.py
@@ -16,10 +16,11 @@
 import numexpr as ne
 import numpy as np
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata, TextDB
 from lgdo import lh5
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--input", help="input file", type=str)
 argparser.add_argument("--output", help="output file", type=str)
diff --git a/workflow/scripts/build_raw_fcio.py b/workflow/src/legenddataflow/scripts/build_raw_fcio.py
similarity index 98%
rename from workflow/scripts/build_raw_fcio.py
rename to workflow/src/legenddataflow/scripts/build_raw_fcio.py
index c3b577e..176565a 100644
--- a/workflow/scripts/build_raw_fcio.py
+++ b/workflow/src/legenddataflow/scripts/build_raw_fcio.py
@@ -6,7 +6,8 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
+
+from ..log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/workflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py
similarity index 98%
rename from workflow/scripts/build_raw_orca.py
rename to workflow/src/legenddataflow/scripts/build_raw_orca.py
index c098806..899c742 100644
--- a/workflow/scripts/build_raw_orca.py
+++ b/workflow/src/legenddataflow/scripts/build_raw_orca.py
@@ -6,7 +6,8 @@
 from daq2lh5 import build_raw
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
+
+from ..log import build_log
 
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
diff --git a/workflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py
similarity index 98%
rename from workflow/scripts/build_skm.py
rename to workflow/src/legenddataflow/scripts/build_skm.py
index be2cfb3..0463c61 100644
--- a/workflow/scripts/build_skm.py
+++ b/workflow/src/legenddataflow/scripts/build_skm.py
@@ -3,10 +3,11 @@
 import awkward as ak
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from lgdo import lh5
 from lgdo.types import Array, Struct, Table, VectorOfVectors
 
+from ..log import build_log
+
 
 def get_all_out_fields(input_table, out_fields, current_field=""):
     for key in input_table:
diff --git a/workflow/scripts/build_tcm.py b/workflow/src/legenddataflow/scripts/build_tcm.py
similarity index 97%
rename from workflow/scripts/build_tcm.py
rename to workflow/src/legenddataflow/scripts/build_tcm.py
index 402c567..7e6ab73 100644
--- a/workflow/scripts/build_tcm.py
+++ b/workflow/src/legenddataflow/scripts/build_tcm.py
@@ -6,9 +6,10 @@
 from daq2lh5.orca import orca_flashcam
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from pygama.evt.build_tcm import build_tcm
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("input", help="input file", type=str)
 argparser.add_argument("output", help="output file", type=str)
diff --git a/workflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py
similarity index 98%
rename from workflow/scripts/check_blinding.py
rename to workflow/src/legenddataflow/scripts/check_blinding.py
index 2a47172..37bf4e9 100644
--- a/workflow/scripts/check_blinding.py
+++ b/workflow/src/legenddataflow/scripts/check_blinding.py
@@ -16,12 +16,13 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo import lh5
 from pygama.math.histogram import get_hist
 from pygama.pargen.energy_cal import get_i_local_maxima
 
+from ..log import build_log
+
 mpl.use("Agg")
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py
similarity index 98%
rename from workflow/scripts/complete_run.py
rename to workflow/src/legenddataflow/scripts/complete_run.py
index 4d5cad7..1223c5c 100644
--- a/workflow/scripts/complete_run.py
+++ b/workflow/src/legenddataflow/scripts/complete_run.py
@@ -7,9 +7,9 @@
 import time
 from pathlib import Path
 
-import legenddataflow.utils as ut
-from legenddataflow import patterns
-from legenddataflow.FileKey import FileKey
+from ..FileKey import FileKey
+from . import patterns
+from . import utils as ut
 
 print("INFO: dataflow ran successfully, now few final checks and scripts")
 
diff --git a/workflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py
similarity index 100%
rename from workflow/scripts/create_chankeylist.py
rename to workflow/src/legenddataflow/scripts/create_chankeylist.py
diff --git a/workflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py
similarity index 99%
rename from workflow/scripts/merge_channels.py
rename to workflow/src/legenddataflow/scripts/merge_channels.py
index 1ca2026..d0f90f7 100644
--- a/workflow/scripts/merge_channels.py
+++ b/workflow/src/legenddataflow/scripts/merge_channels.py
@@ -5,10 +5,11 @@
 
 import numpy as np
 from dbetto.catalog import Props
-from legenddataflow.FileKey import ChannelProcKey
 from legendmeta import LegendMetadata
 from lgdo import lh5
 
+from ..FileKey import ChannelProcKey
+
 
 def replace_path(d, old_path, new_path):
     if isinstance(d, dict):
diff --git a/workflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py
similarity index 99%
rename from workflow/scripts/par_psp_geds.py
rename to workflow/src/legenddataflow/scripts/par_psp_geds.py
index c74ffa3..a765e64 100644
--- a/workflow/scripts/par_psp_geds.py
+++ b/workflow/src/legenddataflow/scripts/par_psp_geds.py
@@ -8,9 +8,10 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from dbetto.catalog import Props
-from legenddataflow.FileKey import ChannelProcKey
 from legendmeta import LegendMetadata
 
+from ..FileKey import ChannelProcKey
+
 mpl.use("Agg")
 
 
diff --git a/workflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
similarity index 97%
rename from workflow/scripts/pars_dsp_build_svm_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
index 3b7b7ea..f6c0878 100644
--- a/workflow/scripts/pars_dsp_build_svm_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
@@ -4,10 +4,11 @@
 
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from lgdo import lh5
 from sklearn.svm import SVC
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--log", help="log file", type=str)
 argparser.add_argument("--configs", help="config file", type=str)
diff --git a/workflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
similarity index 99%
rename from workflow/scripts/pars_dsp_dplms_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
index 5d33fb8..f7f878e 100644
--- a/workflow/scripts/pars_dsp_dplms_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
@@ -8,11 +8,12 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
 argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
diff --git a/workflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
similarity index 99%
rename from workflow/scripts/pars_dsp_eopt_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
index e59ee54..1a6f2d1 100644
--- a/workflow/scripts/pars_dsp_eopt_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
@@ -11,7 +11,6 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from dspeed.units import unit_registry as ureg
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
@@ -20,6 +19,8 @@
     run_one_dsp,
 )
 
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
diff --git a/workflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
similarity index 99%
rename from workflow/scripts/pars_dsp_evtsel_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
index dc76878..1398256 100644
--- a/workflow/scripts/pars_dsp_evtsel_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
@@ -12,11 +12,12 @@
 import pygama.pargen.energy_cal as pgc
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
diff --git a/workflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
similarity index 98%
rename from workflow/scripts/pars_dsp_nopt_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
index ae3aacb..9c5d5ff 100644
--- a/workflow/scripts/pars_dsp_nopt_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
@@ -8,11 +8,12 @@
 import pygama.pargen.noise_optimization as pno
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
 
+from ..log import build_log
+
 sto = lh5.LH5Store()
 
 argparser = argparse.ArgumentParser()
diff --git a/workflow/scripts/pars_dsp_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py
similarity index 100%
rename from workflow/scripts/pars_dsp_svm_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py
diff --git a/workflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
similarity index 99%
rename from workflow/scripts/pars_dsp_tau_geds.py
rename to workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
index 1149c69..552dd3e 100644
--- a/workflow/scripts/pars_dsp_tau_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
@@ -6,12 +6,13 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
 argparser.add_argument("--metadata", help="metadata", type=str, required=True)
diff --git a/workflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
similarity index 98%
rename from workflow/scripts/pars_hit_aoe.py
rename to workflow/src/legenddataflow/scripts/pars_hit_aoe.py
index d7fa221..40ea3c3 100644
--- a/workflow/scripts/pars_hit_aoe.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
@@ -10,14 +10,15 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.convert_np import convert_dict_np_to_float
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 
+from ..convert_np import convert_dict_np_to_float
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
diff --git a/workflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py
similarity index 99%
rename from workflow/scripts/pars_hit_ecal.py
rename to workflow/src/legenddataflow/scripts/pars_hit_ecal.py
index 8bf4f1f..9a2f3c5 100644
--- a/workflow/scripts/pars_hit_ecal.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py
@@ -16,8 +16,6 @@
 import pygama.math.histogram as pgh
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.convert_np import convert_dict_np_to_float
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
@@ -26,6 +24,9 @@
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 
+from ..convert_np import convert_dict_np_to_float
+from ..log import build_log
+
 log = logging.getLogger(__name__)
 mpl.use("agg")
 sto = lh5.LH5Store()
diff --git a/workflow/scripts/pars_hit_lq.py b/workflow/src/legenddataflow/scripts/pars_hit_lq.py
similarity index 95%
rename from workflow/scripts/pars_hit_lq.py
rename to workflow/src/legenddataflow/scripts/pars_hit_lq.py
index c5f04cb..a7a2601 100644
--- a/workflow/scripts/pars_hit_lq.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_lq.py
@@ -9,8 +9,6 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.convert_np import convert_dict_np_to_float
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
@@ -19,6 +17,9 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 
+from ..convert_np import convert_dict_np_to_float
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
diff --git a/workflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/pars_hit_qc.py
similarity index 98%
rename from workflow/scripts/pars_hit_qc.py
rename to workflow/src/legenddataflow/scripts/pars_hit_qc.py
index c9d380f..c83dff7 100644
--- a/workflow/scripts/pars_hit_qc.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_qc.py
@@ -11,8 +11,6 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.convert_np import convert_dict_np_to_float
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -22,6 +20,9 @@
 )
 from pygama.pargen.utils import load_data
 
+from ..convert_np import convert_dict_np_to_float
+from ..log import build_log
+
 log = logging.getLogger(__name__)
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
diff --git a/workflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
similarity index 99%
rename from workflow/scripts/pars_pht_aoecal.py
rename to workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
index bbcf791..4ad0980 100644
--- a/workflow/scripts/pars_pht_aoecal.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
@@ -13,14 +13,15 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 
+from ..FileKey import ChannelProcKey, ProcessingFileKey
+from ..log import build_log
+
 log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/workflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/pars_pht_fast.py
similarity index 98%
rename from workflow/scripts/pars_pht_fast.py
rename to workflow/src/legenddataflow/scripts/pars_pht_fast.py
index 1dfd1d6..a807fa6 100644
--- a/workflow/scripts/pars_pht_fast.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_fast.py
@@ -10,8 +10,6 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pars_pht_aoecal import run_aoe_calibration
 from pars_pht_lqcal import run_lq_calibration
@@ -19,6 +17,9 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 
+from ..FileKey import ChannelProcKey, ProcessingFileKey
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
diff --git a/workflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
similarity index 96%
rename from workflow/scripts/pars_pht_lqcal.py
rename to workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
index 8826efd..a6a231a 100644
--- a/workflow/scripts/pars_pht_lqcal.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
@@ -11,8 +11,6 @@
 import pandas as pd
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
@@ -21,6 +19,9 @@
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 
+from ..FileKey import ChannelProcKey, ProcessingFileKey
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
diff --git a/workflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py
similarity index 99%
rename from workflow/scripts/pars_pht_partcal.py
rename to workflow/src/legenddataflow/scripts/pars_pht_partcal.py
index b3e43c4..7bd8f66 100644
--- a/workflow/scripts/pars_pht_partcal.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py
@@ -13,14 +13,15 @@
 import pygama.math.histogram as pgh
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.FileKey import ChannelProcKey, ProcessingFileKey
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.math.distributions import nb_poly
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 
+from ..FileKey import ChannelProcKey, ProcessingFileKey
+from ..log import build_log
+
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
 
diff --git a/workflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/pars_pht_qc.py
similarity index 99%
rename from workflow/scripts/pars_pht_qc.py
rename to workflow/src/legenddataflow/scripts/pars_pht_qc.py
index 2ad477a..e1cf4dd 100644
--- a/workflow/scripts/pars_pht_qc.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_qc.py
@@ -11,8 +11,6 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.convert_np import convert_dict_np_to_float
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -22,6 +20,9 @@
 )
 from pygama.pargen.utils import load_data
 
+from ..convert_np import convert_dict_np_to_float
+from ..log import build_log
+
 log = logging.getLogger(__name__)
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
diff --git a/workflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
similarity index 98%
rename from workflow/scripts/pars_pht_qc_phy.py
rename to workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
index 791fa2b..b48211f 100644
--- a/workflow/scripts/pars_pht_qc_phy.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
@@ -12,8 +12,6 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.convert_np import convert_dict_np_to_float
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
@@ -21,6 +19,9 @@
     get_keys,
 )
 
+from ..convert_np import convert_dict_np_to_float
+from ..log import build_log
+
 log = logging.getLogger(__name__)
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
diff --git a/workflow/scripts/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
similarity index 97%
rename from workflow/scripts/pars_tcm_pulser.py
rename to workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
index 56700ec..5325dc1 100644
--- a/workflow/scripts/pars_tcm_pulser.py
+++ b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
@@ -6,10 +6,11 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legenddataflow.log import build_log
 from legendmeta import LegendMetadata
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 
+from ..log import build_log
+
 argparser = argparse.ArgumentParser()
 argparser.add_argument("--configs", help="configs path", type=str, required=True)
 argparser.add_argument("--metadata", help="metadata", type=str, required=True)
diff --git a/workflow/scripts/write_filelist.py b/workflow/src/legenddataflow/scripts/write_filelist.py
similarity index 100%
rename from workflow/scripts/write_filelist.py
rename to workflow/src/legenddataflow/scripts/write_filelist.py

From 69b13ab854090e2e7c9fa38f8b330a86689cd15b Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 16:08:03 +0100
Subject: [PATCH 071/101] move package version spec to pyproject.toml and
 implement --editable

---
 config-lngs.yaml                       |  7 ------
 pyproject.toml                         | 18 +++++++--------
 workflow/src/legenddataflow/execenv.py | 31 +++++++++-----------------
 3 files changed, 18 insertions(+), 38 deletions(-)

diff --git a/config-lngs.yaml b/config-lngs.yaml
index 356aa4e..5b04441 100644
--- a/config-lngs.yaml
+++ b/config-lngs.yaml
@@ -69,11 +69,4 @@ execenv:
     PYGAMA_FASTMATH: "false"
     DISABLE_TQDM: "True"
 
-pkg_versions:
-  - pygama==2.0.*
-  - pylegendmeta==1.2.0a2
-  - dspeed==1.6.*
-  - legend-pydataobj>=1.11.4
-  - legend-daq2lh5==1.4.*
-
 legend_metadata_version: v0.5.7
diff --git a/pyproject.toml b/pyproject.toml
index 27e62e5..97b612a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,30 +52,28 @@ readme = "README.md"
 requires-python = ">=3.11"
 dynamic = ["version"]
 
-# these are the dependencies strictly required by legend-dataflow
 dependencies = [
     "colorlog",
     "dbetto>=1.0.5",
+    "pygama>=2",
+    "dspeed>=1.6",
     "pylegendmeta==1.2.0a2",
+    "legend-pydataobj>=1.11.4",
+    "legend-daq2lh5>=1.4",
 ]
 
 [project.optional-dependencies]
 # these are needed to run the data production
-dataprod = [
+runprod = [
     "snakemake>=8.16",
-    "pygama",
-    "dspeed",
-    "pylegendmeta==1.2.0a2",
-    "legend-pydataobj",
-    "legend-daq2lh5",
 ]
 test = [
-    "legend_dataflow[dataprod]",
+    "legend_dataflow[runprod]",
     "pytest>=6",
     "pytest-cov>=3",
 ]
 dev = [
-    "legend_dataflow[dataprod,test]",
+    "legend_dataflow[runprod,test]",
     "pre-commit",
 ]
 docs = [
@@ -88,4 +86,4 @@ docs = [
 ]
 
 [project.scripts]
-dataprod = "legenddataflow.library.execenv:dataprod"
+dataprod = "legenddataflow.execenv:dataprod"
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index ad54d11..6fdde1d 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -11,7 +11,6 @@
 import colorlog
 import dbetto
 from dbetto import AttrsDict
-from packaging.requirements import Requirement
 
 from . import utils
 
@@ -102,6 +101,12 @@ def dataprod() -> None:
         help="remove software directory before installing software",
         action="store_true",
     )
+    parser_install.add_argument(
+        "-e",
+        "--editable",
+        help="install software with pip's --editable flag",
+        action="store_true",
+    )
     parser_install.set_defaults(func=install)
 
     parser_exec = subparsers.add_parser(
@@ -218,25 +223,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
         log.info("installing uv")
         _runcmd(cmd_expr, cmd_env)
 
-    # now packages
-
-    path_src = Path(config_dict.paths.src)
-    pkg_list = []
-    for spec in config_dict.pkg_versions:
-        pkg = Requirement(spec).name
-        if (path_src / pkg).exists():
-            pkg_list.append(str(path_src / pkg))
-        else:
-            pkg_list.append(spec)
-
-    cmd_base = [*python, "-m", "uv", "pip", "--no-cache", "install"]
-
-    cmd_expr = cmd_base + pkg_list
-
-    log.info("installing packages")
-    _runcmd(cmd_expr, cmd_env)
-
-    # and finally legenddataflow
+    # and finally install legenddataflow with all dependencies
 
     cmd_expr = [
         *python,
@@ -245,10 +232,12 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
         "pip",
         "--no-cache",
         "install",
-        # "--editable",  # TODO do we really want this?
         str(config_loc),
     ]
 
+    if args.editable:
+        cmd_expr.insert(-1, "--editable")
+
     log.info("installing packages")
     _runcmd(cmd_expr, cmd_env)
 

From 8e5e4a65187bbd541f29c8d40336ea466295a0d4 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 16:50:21 +0100
Subject: [PATCH 072/101] fix pre-commit config

---
 .pre-commit-config.yaml                       |  13 +-
 pyproject.toml                                |  78 ++++++++++-
 tests/test_util.py                            |  25 ++--
 workflow/src/legenddataflow/FileKey.py        |  12 +-
 workflow/src/legenddataflow/cal_grouping.py   |  16 ++-
 workflow/src/legenddataflow/convert_np.py     |   3 +-
 .../src/legenddataflow/create_pars_keylist.py |  13 +-
 workflow/src/legenddataflow/execenv.py        |  16 ++-
 workflow/src/legenddataflow/pars_loading.py   |   8 +-
 workflow/src/legenddataflow/patterns.py       |  60 +++++++--
 .../scripts/blinding_calibration.py           |  10 +-
 .../src/legenddataflow/scripts/build_dsp.py   |  17 ++-
 .../src/legenddataflow/scripts/build_evt.py   |   8 +-
 .../src/legenddataflow/scripts/build_fdb.py   |   4 +-
 .../src/legenddataflow/scripts/build_hit.py   |   4 +-
 .../legenddataflow/scripts/build_raw_blind.py |  32 +++--
 .../legenddataflow/scripts/build_raw_orca.py  |  36 ++++--
 .../src/legenddataflow/scripts/build_skm.py   |   4 +-
 .../legenddataflow/scripts/check_blinding.py  |  21 ++-
 .../legenddataflow/scripts/complete_run.py    |  48 +++++--
 .../legenddataflow/scripts/merge_channels.py  |   8 +-
 .../legenddataflow/scripts/par_psp_geds.py    |  24 +++-
 .../scripts/pars_dsp_build_svm_geds.py        |   4 +-
 .../scripts/pars_dsp_dplms_geds.py            |  22 +++-
 .../scripts/pars_dsp_eopt_geds.py             |  24 +++-
 .../scripts/pars_dsp_evtsel_geds.py           |  91 ++++++++++---
 .../scripts/pars_dsp_nopt_geds.py             |   8 +-
 .../scripts/pars_dsp_tau_geds.py              |  18 ++-
 .../legenddataflow/scripts/pars_hit_aoe.py    |  11 +-
 .../legenddataflow/scripts/pars_hit_ecal.py   |  85 ++++++++----
 .../src/legenddataflow/scripts/pars_hit_qc.py |  44 +++++--
 .../legenddataflow/scripts/pars_pht_aoecal.py |  64 ++++++---
 .../legenddataflow/scripts/pars_pht_fast.py   |  28 +++-
 .../legenddataflow/scripts/pars_pht_lqcal.py  |  38 ++++--
 .../scripts/pars_pht_partcal.py               | 121 +++++++++++-------
 .../src/legenddataflow/scripts/pars_pht_qc.py |  31 ++++-
 .../legenddataflow/scripts/pars_pht_qc_phy.py |  26 +++-
 .../legenddataflow/scripts/pars_tcm_pulser.py |   4 +-
 workflow/src/legenddataflow/utils.py          |  23 +++-
 39 files changed, 817 insertions(+), 285 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e369b65..0611a74 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,6 +4,12 @@ ci:
   autofix_commit_msg: "style: pre-commit fixes"
 
 repos:
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: "1.19.1"
+    hooks:
+      - id: blacken-docs
+        additional_dependencies: [black==24.*]
+
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: "v5.0.0"
     hooks:
@@ -24,17 +30,12 @@ repos:
       - id: requirements-txt-fixer
       - id: trailing-whitespace
 
-  - repo: https://github.com/psf/black
-    rev: "24.10.0"
-    hooks:
-      - id: black-jupyter
-        args: ["--line-length", "99"]
-
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: "v0.8.6"
     hooks:
       - id: ruff
         args: ["--fix", "--show-fixes"]
+      - id: ruff-format
 
   - repo: https://github.com/abravalheri/validate-pyproject
     rev: "v0.23"
diff --git a/pyproject.toml b/pyproject.toml
index 97b612a..807e71b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,3 @@
-[tool.uv.workspace]
-exclude = ["generated", "inputs", "software", "workflow"]
-
 [build-system]
 requires = [
     "setuptools>=61.2",
@@ -87,3 +84,78 @@ docs = [
 
 [project.scripts]
 dataprod = "legenddataflow.execenv:dataprod"
+
+[tool.uv.workspace]
+exclude = ["generated", "inputs", "software", "workflow"]
+
+[tool.uv]
+dev-dependencies = [
+  "legend-dataflow[test]",
+]
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
+xfail_strict = true
+filterwarnings = [
+  "error",
+]
+log_cli_level = "INFO"
+testpaths = [
+  "tests",
+]
+
+[tool.ruff]
+src = ["workflow/src"]
+
+[tool.ruff.lint]
+extend-select = [
+  "ARG",      # flake8-unused-arguments
+  "B",        # flake8-bugbear
+  "C4",       # flake8-comprehensions
+  "EM",       # flake8-errmsg
+  "EXE",      # flake8-executable
+  "G",        # flake8-logging-format
+  "I",        # isort
+  "ICN",      # flake8-import-conventions
+  "NPY",      # NumPy specific rules
+  "PD",       # pandas-vet
+  "PGH",      # pygrep-hooks
+  "PIE",      # flake8-pie
+  "PL",       # pylint
+  "PT",       # flake8-pytest-style
+  "PTH",      # flake8-use-pathlib
+  "RET",      # flake8-return
+  "RUF",      # Ruff-specific
+  "SIM",      # flake8-simplify
+  "T20",      # flake8-print
+  "UP",       # pyupgrade
+  "YTT",      # flake8-2020
+]
+ignore = [
+  "PT011",    # `pytest.raises(ValueError)` is too broad
+  "PLR09",    # Too many <...>
+  "PLR2004",  # Magic value used in comparison
+  "ISC001",   # Conflicts with formatter
+]
+isort.required-imports = ["from __future__ import annotations"]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**" = ["T20"]
+"noxfile.py" = ["T20"]
+
+
+[tool.pylint]
+py-version = "3.9"
+ignore-paths = [".*/_version.py"]
+reports.output-format = "colorized"
+similarities.ignore-imports = "yes"
+messages_control.disable = [
+  "design",
+  "fixme",
+  "line-too-long",
+  "missing-module-docstring",
+  "missing-function-docstring",
+  "wrong-import-position",
+  "too-many-nested-blocks"
+]
diff --git a/tests/test_util.py b/tests/test_util.py
index acaf609..c1654e0 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -39,7 +39,9 @@ def test_filekey():
     assert key.name == "l200-p00-r000-cal-*"
     key = FileKey.from_string("l200-p00-r000-cal-20230101T123456Z")
     assert key.name == "l200-p00-r000-cal-20230101T123456Z"
-    key = FileKey.get_filekey_from_filename("l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5")
+    key = FileKey.get_filekey_from_filename(
+        "l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5"
+    )
     assert key.name == "l200-p00-r000-cal-20230101T123456Z"
     assert (
         key.get_path_from_filekey(get_pattern_tier_dsp(setup))[0]
@@ -57,7 +59,8 @@ def test_filekey():
 def test_create_pars_keylist():
     key1 = FileKey("l200", "p00", "r000", "cal", "20230101T123456Z")
     assert (
-        pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from == "20230101T123456Z"
+        pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from
+        == "20230101T123456Z"
     )
     key2 = FileKey("l200", "p00", "r000", "cal", "20230102T123456Z")
     assert pars_key_resolve.match_keys(key1, key2) == key1
@@ -68,7 +71,9 @@ def test_create_pars_keylist():
     pkey2 = pars_key_resolve.from_filekey(
         FileKey("l200", "p00", "r000", "lar", "20230102T123456Z"), {"lar": ["par_dsp"]}
     )
-    assert pkey2.apply == ["lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json"]
+    assert pkey2.apply == [
+        "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json"
+    ]
     pars_key_resolve.match_entries(pkey1, pkey2)
     assert set(pkey2.apply) == {
         "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
@@ -95,9 +100,9 @@ def test_create_pars_keylist():
     pkeylist = pars_key_resolve.generate_par_keylist(keylist)
     assert pkeylist == keylist
     assert set(
-        pars_key_resolve.match_all_entries(pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]})[
-            1
-        ].apply
+        pars_key_resolve.match_all_entries(
+            pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]}
+        )[1].apply
     ) == {
         "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
         "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json",
@@ -108,7 +113,9 @@ def test_pars_loading():
     pars_files = CalibCatalog.get_calib_files(
         Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z"
     )
-    assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]
+    assert pars_files == [
+        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"
+    ]
 
     par_override_files = CalibCatalog.get_calib_files(
         Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z"
@@ -118,7 +125,9 @@ def test_pars_loading():
         pars_files, par_override_files
     )
 
-    assert pars_files == ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]
+    assert pars_files == [
+        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"
+    ]
 
     assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == {
         (
diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py
index 63a1842..c11e6e5 100644
--- a/workflow/src/legenddataflow/FileKey.py
+++ b/workflow/src/legenddataflow/FileKey.py
@@ -31,7 +31,9 @@ def regex_from_filepattern(filepattern):
     return "".join(f)
 
 
-class FileKey(namedtuple("FileKey", ["experiment", "period", "run", "datatype", "timestamp"])):
+class FileKey(
+    namedtuple("FileKey", ["experiment", "period", "run", "datatype", "timestamp"])
+):
     __slots__ = ()
 
     re_pattern = "(-(?P<experiment>[^-]+)(\\-(?P<period>[^-]+)(\\-(?P<run>[^-]+)(\\-(?P<datatype>[^-]+)(\\-(?P<timestamp>[^-]+))?)?)?)?)?$"
@@ -110,7 +112,9 @@ def get_path_from_filekey(self, pattern, **kwargs):
             for entry, value in kwargs.items():
                 if isinstance(value, dict):
                     if len(next(iter(set(value).intersection(self._list())))) > 0:
-                        kwargs[entry] = value[next(iter(set(value).intersection(self._list())))]
+                        kwargs[entry] = value[
+                            next(iter(set(value).intersection(self._list())))
+                        ]
                     else:
                         kwargs.pop(entry)
             return self.expand(pattern, **kwargs)
@@ -185,7 +189,9 @@ def get_path_from_filekey(self, pattern, **kwargs):
             for entry, value in kwargs.items():
                 if isinstance(value, dict):
                     if len(next(iter(set(value).intersection(self._list())))) > 0:
-                        kwargs[entry] = value[next(iter(set(value).intersection(self._list())))]
+                        kwargs[entry] = value[
+                            next(iter(set(value).intersection(self._list())))
+                        ]
                     else:
                         kwargs.pop(entry)
             return self.expand(pattern, **kwargs)
diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py
index e41d5c7..ce06c1d 100644
--- a/workflow/src/legenddataflow/cal_grouping.py
+++ b/workflow/src/legenddataflow/cal_grouping.py
@@ -79,9 +79,9 @@ def get_par_files(
             for par_file in par_files:
                 if (
                     par_file.split("-")[-1]
-                    == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split(
-                        "-"
-                    )[-1]
+                    == str(
+                        get_pattern_pars(self.setup, tier, check_in_cycle=False).name
+                    ).split("-")[-1]
                 ):
                     all_par_files.append(par_file)
         if channel == "default":
@@ -132,9 +132,9 @@ def get_plt_files(
             for par_file in par_files:
                 if (
                     par_file.split("-")[-1]
-                    == str(get_pattern_pars(self.setup, tier, check_in_cycle=False).name).split(
-                        "-"
-                    )[-1]
+                    == str(
+                        get_pattern_pars(self.setup, tier, check_in_cycle=False).name
+                    ).split("-")[-1]
                 ):
                     all_par_files.append(par_file)
         if channel == "default":
@@ -190,7 +190,9 @@ def get_log_file(
             fk.channel = channel
         return fk.get_path_from_filekey(get_pattern_log_channel(self.setup, name))[0]
 
-    def get_timestamp(self, catalog, dataset, channel, tier, experiment="l200", datatype="cal"):
+    def get_timestamp(
+        self, catalog, dataset, channel, tier, experiment="l200", datatype="cal"
+    ):
         par_files = self.get_par_files(
             catalog,
             dataset,
diff --git a/workflow/src/legenddataflow/convert_np.py b/workflow/src/legenddataflow/convert_np.py
index cdc363c..dbd8978 100644
--- a/workflow/src/legenddataflow/convert_np.py
+++ b/workflow/src/legenddataflow/convert_np.py
@@ -9,6 +9,7 @@ def convert_dict_np_to_float(dic):
             dic[key] = float(dic[key])
         elif isinstance(dic[key], (list, tuple)):
             dic[key] = [
-                float(x) if isinstance(x, (np.float32, np.float64)) else x for x in dic[key]
+                float(x) if isinstance(x, (np.float32, np.float64)) else x
+                for x in dic[key]
             ]
     return dic
diff --git a/workflow/src/legenddataflow/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py
index a82ef0c..9325a6d 100644
--- a/workflow/src/legenddataflow/create_pars_keylist.py
+++ b/workflow/src/legenddataflow/create_pars_keylist.py
@@ -15,7 +15,6 @@
 
 
 class ParsKeyResolve:
-
     def __init__(self, valid_from, category, apply):
         self.valid_from = valid_from
         self.category = category
@@ -47,7 +46,9 @@ def write_to_jsonl(file_names, path):
     @staticmethod
     def write_to_yaml(file_names, path):
         with Path(path).open("w") as of:
-            yaml.dump([file_name.__dict__ for file_name in file_names], of, sort_keys=False)
+            yaml.dump(
+                [file_name.__dict__ for file_name in file_names], of, sort_keys=False
+            )
 
     @staticmethod
     def match_keys(key1, key2):
@@ -79,7 +80,9 @@ def generate_par_keylist(keys):
 
     @staticmethod
     def match_entries(entry1, entry2):
-        datatype2 = ProcessingFileKey.get_filekey_from_filename(entry2.apply[0]).datatype
+        datatype2 = ProcessingFileKey.get_filekey_from_filename(
+            entry2.apply[0]
+        ).datatype
         for entry in entry1.apply:
             if ProcessingFileKey.get_filekey_from_filename(entry).datatype == datatype2:
                 pass
@@ -105,7 +108,9 @@ def get_keys(keypart, search_pattern):
         else:
             wildcard_dict = d._asdict()
         try:
-            tier_pattern_rx = re.compile(smk.io.regex_from_filepattern(str(search_pattern)))
+            tier_pattern_rx = re.compile(
+                smk.io.regex_from_filepattern(str(search_pattern))
+            )
         except AttributeError:
             tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
         fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0]
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index 6fdde1d..6a0239d 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -88,13 +88,17 @@ def dataprod() -> None:
         prog="dataprod", description="dataprod's command-line interface"
     )
 
-    parser.add_argument("-v", "--verbose", help="increase verbosity", action="store_true")
+    parser.add_argument(
+        "-v", "--verbose", help="increase verbosity", action="store_true"
+    )
 
     subparsers = parser.add_subparsers()
     parser_install = subparsers.add_parser(
         "install", help="install user software in data production environment"
     )
-    parser_install.add_argument("config_file", help="production cycle configuration file")
+    parser_install.add_argument(
+        "config_file", help="production cycle configuration file"
+    )
     parser_install.add_argument(
         "-r",
         "--remove",
@@ -112,7 +116,9 @@ def dataprod() -> None:
     parser_exec = subparsers.add_parser(
         "exec", help="load data production environment and execute a given command"
     )
-    parser_exec.add_argument("config_file", help="production cycle configuration file", type=str)
+    parser_exec.add_argument(
+        "config_file", help="production cycle configuration file", type=str
+    )
     parser_exec.add_argument(
         "command", help="command to run within the container", type=str, nargs="+"
     )
@@ -123,7 +129,9 @@ def dataprod() -> None:
     if args.verbose:
         handler = colorlog.StreamHandler()
         handler.setFormatter(
-            colorlog.ColoredFormatter("%(log_color)s%(name)s [%(levelname)s] %(message)s")
+            colorlog.ColoredFormatter(
+                "%(log_color)s%(name)s [%(levelname)s] %(message)s"
+            )
         )
 
         logger = logging.getLogger("legenddataflow")
diff --git a/workflow/src/legenddataflow/pars_loading.py b/workflow/src/legenddataflow/pars_loading.py
index 80f54a6..bd23011 100644
--- a/workflow/src/legenddataflow/pars_loading.py
+++ b/workflow/src/legenddataflow/pars_loading.py
@@ -20,7 +20,10 @@ def match_pars_files(filelist1, filelist2):
             fk2 = ProcessingFileKey.get_filekey_from_pattern(file2)
             for j, file1 in enumerate(filelist1):
                 fk1 = ProcessingFileKey.get_filekey_from_pattern(file1)
-                if fk1.processing_step == fk2.processing_step and fk1.datatype == fk2.datatype:
+                if (
+                    fk1.processing_step == fk2.processing_step
+                    and fk1.datatype == fk2.datatype
+                ):
                     filelist1[j] = file2
                     if len(filelist2) > 1:
                         filelist2.remove(file2)
@@ -41,7 +44,8 @@ def get_par_file(setup, timestamp, tier):
         pars_files = [Path(get_pars_path(setup, tier)) / file for file in pars_files]
         if len(pars_files_overwrite) > 0:
             pars_overwrite_files = [
-                Path(par_overwrite_path(setup)) / tier / file for file in pars_files_overwrite
+                Path(par_overwrite_path(setup)) / tier / file
+                for file in pars_files_overwrite
             ]
             pars_files += pars_overwrite_files
         return pars_files
diff --git a/workflow/src/legenddataflow/patterns.py b/workflow/src/legenddataflow/patterns.py
index 28d27db..71f5db4 100644
--- a/workflow/src/legenddataflow/patterns.py
+++ b/workflow/src/legenddataflow/patterns.py
@@ -46,7 +46,9 @@ def processing_overwrite_pattern():
 
 
 def full_channel_pattern():
-    return "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}"
+    return (
+        "{experiment}-{period}-{run}-{datatype}-{timestamp}-{channel}-{processing_step}"
+    )
 
 
 def full_channel_pattern_with_extension():
@@ -89,7 +91,10 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
             / "{datatype}"
             / "{period}"
             / "{run}"
-            / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_" + f"{tier}.lh5")
+            / (
+                "{experiment}-{period}-{run}-{datatype}-{timestamp}-tier_"
+                + f"{tier}.lh5"
+            )
         )
     elif tier in ["evt_concat", "pet_concat"]:
         file_pattern = (
@@ -107,7 +112,10 @@ def get_pattern_tier(setup, tier, check_in_cycle=True):
     else:
         msg = "invalid tier"
         raise Exception(msg)
-    if tier_path(setup) not in str(file_pattern.resolve(strict=False)) and check_in_cycle is True:
+    if (
+        tier_path(setup) not in str(file_pattern.resolve(strict=False))
+        and check_in_cycle is True
+    ):
         return "/tmp/" + file_pattern.name
     else:
         return file_pattern
@@ -132,7 +140,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
                 / "cal"
                 / "{period}"
                 / "{run}"
-                / ("{experiment}-{period}-{run}-cal-{timestamp}-par_" + f"{tier}.{extension}")
+                / (
+                    "{experiment}-{period}-{run}-cal-{timestamp}-par_"
+                    + f"{tier}.{extension}"
+                )
             )
     else:
         msg = "invalid tier"
@@ -142,7 +153,10 @@ def get_pattern_pars(setup, tier, name=None, extension="yaml", check_in_cycle=Tr
         and check_in_cycle is True
     ):
         if name is None:
-            return "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}.{extension}"
+            return (
+                "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-"
+                + f"par_{tier}.{extension}"
+            )
         else:
             return (
                 "/tmp/{experiment}-{period}-{run}-cal-{timestamp}-"
@@ -160,7 +174,10 @@ def get_pattern_pars_inputs(setup, tier, name=None, ext="yaml"):
             / "cal"
             / "{period}"
             / "{run}"
-            / ("{experiment}-{period}-{run}-cal-{timestamp}-" + f"par_{tier}_{name}.{ext}")
+            / (
+                "{experiment}-{period}-{run}-cal-{timestamp}-"
+                + f"par_{tier}_{name}.{ext}"
+            )
         )
     else:
         return (
@@ -206,7 +223,10 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
         datatype = "{datatype}"
     if name is None:
         return Path(f"{tmp_par_path(setup)}") / (
-            "{experiment}-{period}-{run}-" + datatype + "-{timestamp}-par_" + f"{tier}.{extension}"
+            "{experiment}-{period}-{run}-"
+            + datatype
+            + "-{timestamp}-par_"
+            + f"{tier}.{extension}"
         )
     else:
         return Path(f"{tmp_par_path(setup)}") / (
@@ -220,7 +240,8 @@ def get_pattern_pars_tmp(setup, tier, name=None, datatype=None, extension="yaml"
 def get_pattern_pars_tmp_channel(setup, tier, name=None, extension="yaml"):
     if name is None:
         return Path(f"{tmp_par_path(setup)}") / (
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_" + f"{tier}.{extension}"
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-par_"
+            + f"{tier}.{extension}"
         )
     else:
         return Path(f"{tmp_par_path(setup)}") / (
@@ -236,7 +257,8 @@ def get_pattern_plts_tmp_channel(setup, tier, name=None):
         )
     else:
         return Path(f"{tmp_plts_path(setup)}") / (
-            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_" + f"{tier}_{name}.pkl"
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-plt_"
+            + f"{tier}_{name}.pkl"
         )
 
 
@@ -257,7 +279,13 @@ def get_pattern_plts(setup, tier, name=None):
             / "cal"
             / "{period}"
             / "{run}"
-            / ("{experiment}-{period}-{run}-cal-{timestamp}-plt_" + tier + "_" + name + ".dir")
+            / (
+                "{experiment}-{period}-{run}-cal-{timestamp}-plt_"
+                + tier
+                + "_"
+                + name
+                + ".dir"
+            )
         )
 
 
@@ -265,7 +293,11 @@ def get_pattern_log(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / ("{experiment}-{period}-{run}-{datatype}-{timestamp}-" + processing_step + ".log")
+        / (
+            "{experiment}-{period}-{run}-{datatype}-{timestamp}-"
+            + processing_step
+            + ".log"
+        )
     )
 
 
@@ -273,7 +305,11 @@ def get_pattern_log_channel(setup, processing_step):
     return (
         Path(f"{tmp_log_path(setup)}")
         / processing_step
-        / ("{experiment}-{period}-{run}-cal-{timestamp}-{channel}-" + processing_step + ".log")
+        / (
+            "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-"
+            + processing_step
+            + ".log"
+        )
     )
 
 
diff --git a/workflow/src/legenddataflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py
index 4a666cc..e4b79f2 100644
--- a/workflow/src/legenddataflow/scripts/blinding_calibration.py
+++ b/workflow/src/legenddataflow/scripts/blinding_calibration.py
@@ -50,7 +50,9 @@
 # if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True:
 pars_dict = {}
 # peaks to search for
-peaks_keV = np.array([238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50])
+peaks_keV = np.array(
+    [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50]
+)
 
 E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np")
 E_uncal = E_uncal[E_uncal > 200]
@@ -98,7 +100,11 @@
 ax.set_ylabel("counts")
 ax.set_yscale("log")
 ax2 = plt.subplot(212)
-ax2.hist(E_uncal * roughpars[0], bins=np.arange(2600, 2630, 1 * roughpars[0]), histtype="step")
+ax2.hist(
+    E_uncal * roughpars[0],
+    bins=np.arange(2600, 2630, 1 * roughpars[0]),
+    histtype="step",
+)
 ax2.set_xlabel("energy (keV)")
 ax2.set_ylabel("counts")
 plt.suptitle(args.channel)
diff --git a/workflow/src/legenddataflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py
index 137eb1f..7e44bb6 100644
--- a/workflow/src/legenddataflow/scripts/build_dsp.py
+++ b/workflow/src/legenddataflow/scripts/build_dsp.py
@@ -36,7 +36,9 @@ def replace_list_with_array(dic):
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
-argparser.add_argument("--pars_file", help="database file for detector", nargs="*", default=[])
+argparser.add_argument(
+    "--pars_file", help="database file for detector", nargs="*", default=[]
+)
 argparser.add_argument("--input", help="input file", type=str)
 
 argparser.add_argument("--output", help="output file", type=str)
@@ -74,7 +76,9 @@ def replace_list_with_array(dic):
         for chan, file in channel_dict.items()
     }
 db_files = [
-    par_file for par_file in args.pars_file if Path(par_file).suffix in (".json", ".yaml", ".yml")
+    par_file
+    for par_file in args.pars_file
+    if Path(par_file).suffix in (".json", ".yaml", ".yml")
 ]
 
 database_dic = Props.read_from(db_files, subst_pathvar=True)
@@ -105,9 +109,12 @@ def replace_list_with_array(dic):
 key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "")
 
 if args.tier in ["dsp", "psp"]:
-
-    raw_channels = [channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)]
-    raw_fields = [field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")]
+    raw_channels = [
+        channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)
+    ]
+    raw_fields = [
+        field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")
+    ]
 
     outputs = {}
     channels = []
diff --git a/workflow/src/legenddataflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py
index b0bf2a4..b4723b4 100644
--- a/workflow/src/legenddataflow/scripts/build_evt.py
+++ b/workflow/src/legenddataflow/scripts/build_evt.py
@@ -53,7 +53,9 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 # load in config
 configs = TextDB(args.configs, lazy=True)
 if args.tier in ("evt", "pet"):
-    rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_evt"]
+    rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+        "tier_evt"
+    ]
 
 else:
     msg = "unknown tier"
@@ -74,7 +76,9 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
     exp_string = exp_string.replace(
         'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"'
     )
-    exp_string = exp_string.replace('cal_par_files=""', f"cal_par_files={args.par_files}")
+    exp_string = exp_string.replace(
+        'cal_par_files=""', f"cal_par_files={args.par_files}"
+    )
     exp_string2 = exp_string.replace('return_mode="energy"', 'return_mode="tcm_index"')
 
     file_path_config = {
diff --git a/workflow/src/legenddataflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py
index f628341..93a3567 100644
--- a/workflow/src/legenddataflow/scripts/build_fdb.py
+++ b/workflow/src/legenddataflow/scripts/build_fdb.py
@@ -41,7 +41,9 @@
 timestamps = np.zeros(len(fdb.df), dtype="float64")
 
 for i, row in enumerate(fdb.df.itertuples()):
-    store = lh5.LH5Store(base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True)
+    store = lh5.LH5Store(
+        base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True
+    )
 
     # list of first timestamps for each channel
     loc_timestamps = np.full(len(row.raw_tables), fill_value=default, dtype="float64")
diff --git a/workflow/src/legenddataflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py
index 4f31947..47b0fa0 100644
--- a/workflow/src/legenddataflow/scripts/build_hit.py
+++ b/workflow/src/legenddataflow/scripts/build_hit.py
@@ -27,7 +27,9 @@
 
 configs = TextDB(args.configs, lazy=True)
 if args.tier == "hit" or args.tier == "pht":
-    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_hit"]
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+        "tier_hit"
+    ]
 else:
     msg = "unknown tier"
     raise ValueError(msg)
diff --git a/workflow/src/legenddataflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py
index ef704dd..3d42717 100644
--- a/workflow/src/legenddataflow/scripts/build_raw_blind.py
+++ b/workflow/src/legenddataflow/scripts/build_raw_blind.py
@@ -36,7 +36,9 @@
 args = argparser.parse_args()
 
 configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+    "tier_raw"
+]
 
 log = build_log(config_dict, args.log)
 
@@ -53,19 +55,29 @@
 # list of Ge channels and SiPM channels with associated metadata
 legendmetadata = LegendMetadata(args.metadata, lazy=True)
 ged_channels = (
-    legendmetadata.channelmap(args.timestamp).map("system", unique=False)["geds"].map("daq.rawid")
+    legendmetadata.channelmap(args.timestamp)
+    .map("system", unique=False)["geds"]
+    .map("daq.rawid")
 )
 spms_channels = (
-    legendmetadata.channelmap(args.timestamp).map("system", unique=False)["spms"].map("daq.rawid")
+    legendmetadata.channelmap(args.timestamp)
+    .map("system", unique=False)["spms"]
+    .map("daq.rawid")
 )
 auxs_channels = (
-    legendmetadata.channelmap(args.timestamp).map("system", unique=False)["auxs"].map("daq.rawid")
+    legendmetadata.channelmap(args.timestamp)
+    .map("system", unique=False)["auxs"]
+    .map("daq.rawid")
 )
 blsn_channels = (
-    legendmetadata.channelmap(args.timestamp).map("system", unique=False)["bsln"].map("daq.rawid")
+    legendmetadata.channelmap(args.timestamp)
+    .map("system", unique=False)["bsln"]
+    .map("daq.rawid")
 )
 puls_channels = (
-    legendmetadata.channelmap(args.timestamp).map("system", unique=False)["puls"].map("daq.rawid")
+    legendmetadata.channelmap(args.timestamp)
+    .map("system", unique=False)["puls"]
+    .map("daq.rawid")
 )
 
 store = lh5.LH5Store()
@@ -88,7 +100,9 @@
     # calibrate daq energy using pre existing curve
     daqenergy_cal = ne.evaluate(
         blind_curve["daqenergy_cal"]["expression"],
-        local_dict=dict(daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]),
+        local_dict=dict(
+            daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]
+        ),
     )
 
     # figure out which event indices should be blinded
@@ -148,7 +162,9 @@
     # the rest should be the Ge and SiPM channels that need to be blinded
 
     # read in all of the data but only for the unblinded events
-    blinded_chobj, _ = store.read(channel + "/raw", args.input, idx=tokeep, decompress=False)
+    blinded_chobj, _ = store.read(
+        channel + "/raw", args.input, idx=tokeep, decompress=False
+    )
 
     # now write the blinded data for this channel
     store.write_object(
diff --git a/workflow/src/legenddataflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py
index 899c742..72b5ac6 100644
--- a/workflow/src/legenddataflow/scripts/build_raw_orca.py
+++ b/workflow/src/legenddataflow/scripts/build_raw_orca.py
@@ -25,7 +25,9 @@
 Path(args.output).parent.mkdir(parents=True, exist_ok=True)
 
 configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]["tier_raw"]
+config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+    "tier_raw"
+]
 
 log = build_log(config_dict, args.log)
 
@@ -40,7 +42,9 @@
     ged_config = Props.read_from(channel_dict["geds_config"])
 
     ged_channels = list(
-        chmap.channelmaps.on(args.timestamp).map("system", unique=False)["geds"].map("daq.rawid")
+        chmap.channelmaps.on(args.timestamp)
+        .map("system", unique=False)["geds"]
+        .map("daq.rawid")
     )
 
     ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
@@ -50,7 +54,9 @@
     spm_config = Props.read_from(channel_dict["spms_config"])
 
     spm_channels = list(
-        chmap.channelmaps.on(args.timestamp).map("system", unique=False)["spms"].map("daq.rawid")
+        chmap.channelmaps.on(args.timestamp)
+        .map("system", unique=False)["spms"]
+        .map("daq.rawid")
     )
 
     spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
@@ -59,25 +65,37 @@
 if "auxs_config" in list(channel_dict):
     aux_config = Props.read_from(channel_dict["auxs_config"])
     aux_channels = list(
-        chmap.channelmaps.on(args.timestamp).map("system", unique=False)["auxs"].map("daq.rawid")
+        chmap.channelmaps.on(args.timestamp)
+        .map("system", unique=False)["auxs"]
+        .map("daq.rawid")
     )
     aux_channels += list(
-        chmap.channelmaps.on(args.timestamp).map("system", unique=False)["puls"].map("daq.rawid")
+        chmap.channelmaps.on(args.timestamp)
+        .map("system", unique=False)["puls"]
+        .map("daq.rawid")
     )
     aux_channels += list(
-        chmap.channelmaps.on(args.timestamp).map("system", unique=False)["bsln"].map("daq.rawid")
+        chmap.channelmaps.on(args.timestamp)
+        .map("system", unique=False)["bsln"]
+        .map("daq.rawid")
     )
     top_key = next(iter(aux_config))
-    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(aux_channels)
+    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(
+        aux_channels
+    )
     Props.add_to(all_config, aux_config)
 
 if "muon_config" in list(channel_dict):
     muon_config = Props.read_from(channel_dict["muon_config"])
     muon_channels = list(
-        chmap.channelmaps.on(args.timestamp).map("system", unique=False)["muon"].map("daq.rawid")
+        chmap.channelmaps.on(args.timestamp)
+        .map("system", unique=False)["muon"]
+        .map("daq.rawid")
     )
     top_key = next(iter(muon_config))
-    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(muon_channels)
+    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(
+        muon_channels
+    )
     Props.add_to(all_config, muon_config)
 
 rng = np.random.default_rng()
diff --git a/workflow/src/legenddataflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py
index 0463c61..9411b1b 100644
--- a/workflow/src/legenddataflow/scripts/build_skm.py
+++ b/workflow/src/legenddataflow/scripts/build_skm.py
@@ -59,7 +59,9 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
 
     if isinstance(ptr1[items[-1]], Table):
         out_fields.remove(field)
-        out_fields = get_all_out_fields(ptr1[items[-1]], out_fields, current_field=field)
+        out_fields = get_all_out_fields(
+            ptr1[items[-1]], out_fields, current_field=field
+        )
 
 # remove unwanted columns
 out_table_skm = Table(size=len(out_table))
diff --git a/workflow/src/legenddataflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py
index 37bf4e9..faf800d 100644
--- a/workflow/src/legenddataflow/scripts/check_blinding.py
+++ b/workflow/src/legenddataflow/scripts/check_blinding.py
@@ -29,7 +29,9 @@
 argparser.add_argument("--files", help="files", nargs="*", type=str)
 argparser.add_argument("--output", help="output file", type=str)
 argparser.add_argument("--plot_file", help="plot file", type=str)
-argparser.add_argument("--blind_curve", help="blinding curves file", nargs="*", type=str)
+argparser.add_argument(
+    "--blind_curve", help="blinding curves file", nargs="*", type=str
+)
 argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--configs", help="config file", type=str)
@@ -44,14 +46,18 @@
 log = build_log(config_dict, args.log)
 
 # get the usability status for this channel
-chmap = LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
+chmap = (
+    LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
+)
 det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"]
 
 # read in calibration curve for this channel
 blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"]
 
 # load in the data
-daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np")
+daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as(
+    "np"
+)
 
 # calibrate daq energy using pre existing curve
 daqenergy_cal = ne.evaluate(
@@ -86,11 +92,16 @@
 # check for peaks within +- 5keV of  2614 and 583 to ensure blinding still
 # valid and if so create file else raise error.  if detector is in ac mode it
 # will always pass this check
-if (np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)) or det_status is False:
+if (
+    np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)
+) or det_status is False:
     Path(args.output).mkdir(parents=True, exist_ok=True)
     Props.write_to(
         args.output,
-        {"threshold_adc": np.nanmin(daqenergy), "threshold_kev": np.nanmin(daqenergy_cal)},
+        {
+            "threshold_adc": np.nanmin(daqenergy),
+            "threshold_kev": np.nanmin(daqenergy_cal),
+        },
     )
 else:
     msg = "peaks not found in daqenergy"
diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py
index 1223c5c..e3892eb 100644
--- a/workflow/src/legenddataflow/scripts/complete_run.py
+++ b/workflow/src/legenddataflow/scripts/complete_run.py
@@ -50,9 +50,13 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None):
                 Path(file).unlink()
                 text = None
             if n_errors == 0:
-                f.write(f"{gen_output} successfully generated at {now} with no errors \n")
+                f.write(
+                    f"{gen_output} successfully generated at {now} with no errors \n"
+                )
             if n_warnings == 0:
-                w.write(f"{gen_output} successfully generated at {now} with no warnings \n")
+                w.write(
+                    f"{gen_output} successfully generated at {now} with no warnings \n"
+                )
     else:
         with Path(output_file).open("w") as f:
             n_errors = 0
@@ -73,7 +77,9 @@ def check_log_files(log_path, output_file, gen_output, warning_file=None):
                 Path(file).unlink()
                 text = None
             if n_errors == 0:
-                f.write(f"{gen_output} successfully generated at {now} with no errors \n")
+                f.write(
+                    f"{gen_output} successfully generated at {now} with no errors \n"
+                )
     walk = list(os.walk(log_path))
     for path, _, _ in walk[::-1]:
         if len(os.listdir(path)) == 0:
@@ -139,7 +145,9 @@ def build_valid_keys(input_files_regex, output_dir):
 
     for key in list(key_dict):
         dtype = key.split("-")[-1]
-        out_file = Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json'
+        out_file = (
+            Path(output_dir) / f'{key.replace(f"-{dtype}", "")}-valid_{dtype}.json'
+        )
         out_file.parent.mkdir(parents=True, exist_ok=True)
         if Path(out_file).is_file():
             out_dict = Props.read_from([out_file] + key_dict[key])
@@ -163,7 +171,9 @@ def find_gen_runs(gen_tier_path):
     # then look for concat tiers (use filenames now)
     paths_concat = gen_tier_path.glob("*/*/*.lh5")
     # use the directories to build a datatype/period/run string
-    runs_concat = {"/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat}
+    runs_concat = {
+        "/".join([str(p).split("-")[3]] + str(p).split("-")[1:3]) for p in paths_concat
+    }
 
     return runs | runs_concat
 
@@ -186,7 +196,10 @@ def build_file_dbs(gen_tier_path, outdir):
         outdir.mkdir(parents=True, exist_ok=True)
         # TODO: replace l200 with {experiment}
         outfile = outdir / f"l200-{speck[1]}-{speck[2]}-{speck[0]}-filedb.h5"
-        logfile = Path(ut.tmp_log_path(snakemake.params.setup)) / outfile.with_suffix(".log").name
+        logfile = (
+            Path(ut.tmp_log_path(snakemake.params.setup))
+            / outfile.with_suffix(".log").name
+        )
         print(f"INFO: ......building {outfile}")
 
         cmdline = [
@@ -223,7 +236,11 @@ def build_file_dbs(gen_tier_path, outdir):
 
     for p in processes:
         if p.returncode != 0:
-            _cmdline = " ".join([f"{k}={v}" for k, v in cmdenv.items()]) + " " + " ".join(p.args)
+            _cmdline = (
+                " ".join([f"{k}={v}" for k, v in cmdenv.items()])
+                + " "
+                + " ".join(p.args)
+            )
             msg = f"at least one FileDB building thread failed: {_cmdline}"
             raise RuntimeError(msg)
 
@@ -234,11 +251,16 @@ def build_file_dbs(gen_tier_path, outdir):
 
 file_db_config = {}
 
-if os.getenv("PRODENV") is not None and os.getenv("PRODENV") in snakemake.params.filedb_path:
+if (
+    os.getenv("PRODENV") is not None
+    and os.getenv("PRODENV") in snakemake.params.filedb_path
+):
     prodenv = as_ro(os.getenv("PRODENV"))
 
     def tdirs(tier):
-        return as_ro(ut.get_tier_path(snakemake.params.setup, tier)).replace(prodenv, "")
+        return as_ro(ut.get_tier_path(snakemake.params.setup, tier)).replace(
+            prodenv, ""
+        )
 
     file_db_config["data_dir"] = "$PRODENV"
 
@@ -251,11 +273,15 @@ def tdirs(tier):
     file_db_config["data_dir"] = "/"
 
 
-file_db_config["tier_dirs"] = {k: tdirs(k) for k in snakemake.params.setup["table_format"]}
+file_db_config["tier_dirs"] = {
+    k: tdirs(k) for k in snakemake.params.setup["table_format"]
+}
 
 
 def fformat(tier):
-    abs_path = patterns.get_pattern_tier(snakemake.params.setup, tier, check_in_cycle=False)
+    abs_path = patterns.get_pattern_tier(
+        snakemake.params.setup, tier, check_in_cycle=False
+    )
     return str(abs_path).replace(ut.get_tier_path(snakemake.params.setup, tier), "")
 
 
diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py
index d0f90f7..d6fec7a 100644
--- a/workflow/src/legenddataflow/scripts/merge_channels.py
+++ b/workflow/src/legenddataflow/scripts/merge_channels.py
@@ -86,7 +86,6 @@ def replace_path(d, old_path, new_path):
             if chmap is not None:
                 channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
             else:
-
                 channel_name = fkey.channel
             out_dict[channel_name] = channel_dict
         else:
@@ -104,7 +103,6 @@ def replace_path(d, old_path, new_path):
         if chmap is not None:
             channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
         else:
-
             channel_name = fkey.channel
         out_dict[channel_name] = channel_dict
 
@@ -123,7 +121,6 @@ def replace_path(d, old_path, new_path):
             if chmap is not None:
                 channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
             else:
-
                 channel_name = fkey.channel
             if isinstance(channel_dict, dict) and "common" in list(channel_dict):
                 chan_common_dict = channel_dict.pop("common")
@@ -142,7 +139,6 @@ def replace_path(d, old_path, new_path):
             if chmap is not None:
                 channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
             else:
-
                 channel_name = fkey.channel
             tb_in = lh5.read(f"{channel_name}", channel)
 
@@ -153,7 +149,9 @@ def replace_path(d, old_path, new_path):
                 wo_mode="a",
             )
             if args.in_db:
-                db_dict[channel_name] = replace_path(db_dict[channel_name], channel, args.output)
+                db_dict[channel_name] = replace_path(
+                    db_dict[channel_name], channel, args.output
+                )
         else:
             msg = "Output file extension does not match input file extension"
             raise RuntimeError(msg)
diff --git a/workflow/src/legenddataflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py
index a765e64..e65903c 100644
--- a/workflow/src/legenddataflow/scripts/par_psp_geds.py
+++ b/workflow/src/legenddataflow/scripts/par_psp_geds.py
@@ -16,13 +16,21 @@
 
 
 argparser = argparse.ArgumentParser()
-argparser.add_argument("--input", help="input files", nargs="*", type=str, required=True)
-argparser.add_argument("--output", help="output file", nargs="*", type=str, required=True)
-argparser.add_argument("--in_plots", help="input plot files", nargs="*", type=str, required=False)
+argparser.add_argument(
+    "--input", help="input files", nargs="*", type=str, required=True
+)
+argparser.add_argument(
+    "--output", help="output file", nargs="*", type=str, required=True
+)
+argparser.add_argument(
+    "--in_plots", help="input plot files", nargs="*", type=str, required=False
+)
 argparser.add_argument(
     "--out_plots", help="output plot files", nargs="*", type=str, required=False
 )
-argparser.add_argument("--in_obj", help="input object files", nargs="*", type=str, required=False)
+argparser.add_argument(
+    "--in_obj", help="input object files", nargs="*", type=str, required=False
+)
 argparser.add_argument(
     "--out_obj", help="output object files", nargs="*", type=str, required=False
 )
@@ -35,7 +43,9 @@
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 args = argparser.parse_args()
 
-configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+configs = LegendMetadata(args.configs, lazy=True).on(
+    args.timestamp, system=args.datatype
+)
 merge_config = Props.read_from(
     configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel]
 )
@@ -96,7 +106,9 @@
                 val = val[key]
 
     fig = plt.figure()
-    plt.scatter([datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals)
+    plt.scatter(
+        [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals
+    )
     plt.axhline(y=mean_val, color="r", linestyle="-")
     plt.xlabel("time")
     if unit is not None:
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
index f6c0878..a5310e9 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
@@ -18,7 +18,9 @@
 
 argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
 argparser.add_argument("--train_data", help="input data file", type=str, required=True)
-argparser.add_argument("--train_hyperpars", help="input hyperparameter file", required=True)
+argparser.add_argument(
+    "--train_hyperpars", help="input hyperparameter file", required=True
+)
 args = argparser.parse_args()
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
index f7f878e..a47b653 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
@@ -46,7 +46,9 @@
 channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
 channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-configs = LegendMetadata(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+configs = LegendMetadata(args.configs, lazy=True).on(
+    args.timestamp, system=args.datatype
+)
 dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
 
 dplms_json = config_dict["inputs"]["dplms_pars"][args.channel]
@@ -62,7 +64,9 @@
     log.info("\nLoad fft data")
     energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    raw_fft = sto.read(f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs)[0]
+    raw_fft = sto.read(
+        f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
+    )[0]
     t1 = time.time()
     log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
 
@@ -71,13 +75,17 @@
     kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
+    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][
+        "peak"
+    ].nda
     ids = np.isin(peaks, peaks_rounded)
     peaks = peaks[ids]
     idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
 
     raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
-    log.info(f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}")
+    log.info(
+        f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}"
+    )
 
     if isinstance(dsp_config, (str, list)):
         dsp_config = Props.read_from(dsp_config)
@@ -107,9 +115,9 @@
 
     coeffs = out_dict["dplms"].pop("coefficients")
     dplms_pars = Table(col_dict={"coefficients": Array(coeffs)})
-    out_dict["dplms"][
-        "coefficients"
-    ] = f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')"
+    out_dict["dplms"]["coefficients"] = (
+        f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')"
+    )
 
     log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
 else:
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
index 1a6f2d1..c059961 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
@@ -39,11 +39,15 @@
 argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-argparser.add_argument("--final_dsp_pars", help="final_dsp_pars", type=str, required=True)
+argparser.add_argument(
+    "--final_dsp_pars", help="final_dsp_pars", type=str, required=True
+)
 argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str)
 argparser.add_argument("--plot_path", help="plot_path", type=str)
 
-argparser.add_argument("--plot_save_path", help="plot_save_path", type=str, required=False)
+argparser.add_argument(
+    "--plot_save_path", help="plot_save_path", type=str, required=False
+)
 args = argparser.parse_args()
 
 configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
@@ -104,7 +108,9 @@
         )
 
     peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0]["peak"].nda
+    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][
+        "peak"
+    ].nda
     ids = np.isin(peaks, peaks_rounded)
     peaks = peaks[ids]
     idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
@@ -275,9 +281,15 @@
     bopt_trap.lambda_param = lambda_param
     bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
 
-    bopt_cusp.add_initial_values(x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp)
-    bopt_zac.add_initial_values(x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac)
-    bopt_trap.add_initial_values(x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap)
+    bopt_cusp.add_initial_values(
+        x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp
+    )
+    bopt_zac.add_initial_values(
+        x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac
+    )
+    bopt_trap.add_initial_values(
+        x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap
+    )
 
     best_idx = np.nanargmin(sample_y_cusp)
     bopt_cusp.optimal_results = results_cusp[best_idx]
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
index 1398256..2c01421 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
@@ -71,7 +71,9 @@ def get_out_data(
             "baseline": lgdo.Array(raw_data["baseline"].nda[final_mask]),
             "daqenergy": lgdo.Array(raw_data["daqenergy"].nda[final_mask]),
             "daqenergy_cal": lgdo.Array(raw_data["daqenergy_cal"].nda[final_mask]),
-            "trapTmax_cal": lgdo.Array(dsp_data["trapTmax"].nda[final_mask] * ecal_pars),
+            "trapTmax_cal": lgdo.Array(
+                dsp_data["trapTmax"].nda[final_mask] * ecal_pars
+            ),
             "peak": lgdo.Array(np.full(len(np.where(final_mask)[0]), int(peak))),
         }
     )
@@ -81,11 +83,17 @@ def get_out_data(
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
-    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
-    argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+    )
+    argparser.add_argument(
+        "--pulser_file", help="pulser_file", type=str, required=False
+    )
 
     argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-    argparser.add_argument("--raw_cal", help="raw_cal", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--raw_cal", help="raw_cal", type=str, nargs="*", required=True
+    )
 
     argparser.add_argument("--log", help="log_file", type=str)
     argparser.add_argument("--configs", help="configs", type=str, required=True)
@@ -160,16 +168,23 @@ def get_out_data(
         if lh5_path[-1] != "/":
             lh5_path += "/"
 
-        raw_fields = [field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)]
+        raw_fields = [
+            field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)
+        ]
 
-        tb = sto.read(lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"])[0]
+        tb = sto.read(
+            lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"]
+        )[0]
 
         discharges = tb["t_sat_lo"].nda > 0
         discharge_timestamps = np.where(tb["timestamp"].nda[discharges])[0]
         is_recovering = np.full(len(tb), False, dtype=bool)
         for tstamp in discharge_timestamps:
             is_recovering = is_recovering | np.where(
-                (((tb["timestamp"].nda - tstamp) < 0.01) & ((tb["timestamp"].nda - tstamp) > 0)),
+                (
+                    ((tb["timestamp"].nda - tstamp) < 0.01)
+                    & ((tb["timestamp"].nda - tstamp) > 0)
+                ),
                 True,
                 False,
             )
@@ -190,7 +205,9 @@ def get_out_data(
             masks[peak] = np.where(e_mask & (~is_recovering))[0]
             log.debug(f"{len(masks[peak])} events found in energy range for {peak}")
 
-        input_data = sto.read(f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0])[0]
+        input_data = sto.read(
+            f"{lh5_path}", raw_files, n_rows=10000, idx=np.where(~mask)[0]
+        )[0]
 
         if isinstance(dsp_config, str):
             dsp_config = Props.read_from(dsp_config)
@@ -230,7 +247,9 @@ def get_out_data(
                     n_rows_to_read_i = bisect_left(peak_dict["idxs"][0], n_rows_i)
                     # now split idx into idx_i and the remainder
                     idx_i = (peak_dict["idxs"][0][:n_rows_to_read_i],)
-                    peak_dict["idxs"] = (peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,)
+                    peak_dict["idxs"] = (
+                        peak_dict["idxs"][0][n_rows_to_read_i:] - n_rows_i,
+                    )
                     if len(idx_i[0]) > 0:
                         peak_dict["obj_buf"], n_rows_read_i = sto.read(
                             lh5_path,
@@ -246,12 +265,17 @@ def get_out_data(
                         peak_dict["obj_buf_start"] += n_rows_read_i
                     if peak_dict["n_rows_read"] >= 10000 or file == raw_files[-1]:
                         if "e_lower_lim" not in peak_dict:
-                            tb_out = run_one_dsp(peak_dict["obj_buf"], dsp_config, db_dict=db_dict)
+                            tb_out = run_one_dsp(
+                                peak_dict["obj_buf"], dsp_config, db_dict=db_dict
+                            )
                             energy = tb_out[energy_parameter].nda
 
                             init_bin_width = (
                                 2
-                                * (np.nanpercentile(energy, 75) - np.nanpercentile(energy, 25))
+                                * (
+                                    np.nanpercentile(energy, 75)
+                                    - np.nanpercentile(energy, 25)
+                                )
                                 * len(energy) ** (-1 / 3)
                             )
 
@@ -285,10 +309,14 @@ def get_out_data(
                                 log.debug("Fit failed, using max guess")
                                 rough_adc_to_kev = peak / peak_loc
                                 e_lower_lim = (
-                                    peak_loc - (1.5 * peak_dict["kev_width"][0]) / rough_adc_to_kev
+                                    peak_loc
+                                    - (1.5 * peak_dict["kev_width"][0])
+                                    / rough_adc_to_kev
                                 )
                                 e_upper_lim = (
-                                    peak_loc + (1.5 * peak_dict["kev_width"][1]) / rough_adc_to_kev
+                                    peak_loc
+                                    + (1.5 * peak_dict["kev_width"][1])
+                                    / rough_adc_to_kev
                                 )
                                 hist, bins, var = pgh.get_hist(
                                     energy,
@@ -298,8 +326,12 @@ def get_out_data(
                                 mu = pgh.get_bin_centers(bins)[np.nanargmax(hist)]
 
                             updated_adc_to_kev = peak / mu
-                            e_lower_lim = mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev
-                            e_upper_lim = mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev
+                            e_lower_lim = (
+                                mu - (peak_dict["kev_width"][0]) / updated_adc_to_kev
+                            )
+                            e_upper_lim = (
+                                mu + (peak_dict["kev_width"][1]) / updated_adc_to_kev
+                            )
                             log.info(
                                 f"{peak}: lower lim is :{e_lower_lim}, upper lim is {e_upper_lim}"
                             )
@@ -319,13 +351,23 @@ def get_out_data(
                                 final_cut_field=final_cut_field,
                                 energy_param=energy_parameter,
                             )
-                            sto.write(out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a")
+                            sto.write(
+                                out_tbl,
+                                name=lh5_path,
+                                lh5_file=temp_output,
+                                wo_mode="a",
+                            )
                             peak_dict["obj_buf"] = None
                             peak_dict["obj_buf_start"] = 0
                             peak_dict["n_events"] = n_wfs
-                            log.debug(f'found {peak_dict["n_events"]} events for {peak}')
+                            log.debug(
+                                f'found {peak_dict["n_events"]} events for {peak}'
+                            )
                         else:
-                            if peak_dict["obj_buf"] is not None and len(peak_dict["obj_buf"]) > 0:
+                            if (
+                                peak_dict["obj_buf"] is not None
+                                and len(peak_dict["obj_buf"]) > 0
+                            ):
                                 tb_out = run_one_dsp(
                                     peak_dict["obj_buf"], dsp_config, db_dict=db_dict
                                 )
@@ -343,14 +385,21 @@ def get_out_data(
                                 )
                                 peak_dict["n_events"] += n_wfs
                                 sto.write(
-                                    out_tbl, name=lh5_path, lh5_file=temp_output, wo_mode="a"
+                                    out_tbl,
+                                    name=lh5_path,
+                                    lh5_file=temp_output,
+                                    wo_mode="a",
                                 )
                                 peak_dict["obj_buf"] = None
                                 peak_dict["obj_buf_start"] = 0
-                                log.debug(f'found {peak_dict["n_events"]} events for {peak}')
+                                log.debug(
+                                    f'found {peak_dict["n_events"]} events for {peak}'
+                                )
                                 if peak_dict["n_events"] >= n_events:
                                     peak_dict["idxs"] = None
-                                    log.debug(f"{peak} has reached the required number of events")
+                                    log.debug(
+                                        f"{peak} has reached the required number of events"
+                                    )
 
     else:
         Path(temp_output).touch()
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
index 9c5d5ff..7e843e8 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
@@ -60,7 +60,9 @@
 
     energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0]
     idxs = np.where(energies.nda == 0)[0]
-    tb_data = sto.read(f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs)[0]
+    tb_data = sto.read(
+        f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs
+    )[0]
     t1 = time.time()
     log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}")
 
@@ -81,7 +83,9 @@
             tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1
         )
     else:
-        out_dict = pno.noise_optimization(raw_files, dsp_config, db_dict.copy(), opt_dict, channel)
+        out_dict = pno.noise_optimization(
+            raw_files, dsp_config, db_dict.copy(), opt_dict, channel
+        )
 
     t2 = time.time()
     log.info(f"Optimiser finished in {(t2-t0)/60} minutes")
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
index 552dd3e..1ca084b 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
+++ b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
@@ -28,7 +28,9 @@
 argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False)
 
 argparser.add_argument("--raw_files", help="input files", nargs="*", type=str)
-argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str, required=False)
+argparser.add_argument(
+    "--tcm_files", help="tcm_files", nargs="*", type=str, required=False
+)
 args = argparser.parse_args()
 
 sto = lh5.LH5Store()
@@ -50,7 +52,10 @@
 if kwarg_dict["run_tau"] is True:
     dsp_config = Props.read_from(channel_dict)
     kwarg_dict.pop("run_tau")
-    if isinstance(args.raw_files, list) and args.raw_files[0].split(".")[-1] == "filelist":
+    if (
+        isinstance(args.raw_files, list)
+        and args.raw_files[0].split(".")[-1] == "filelist"
+    ):
         input_file = args.raw_files[0]
         with Path(input_file).open() as f:
             input_file = f.read().splitlines()
@@ -83,11 +88,16 @@
     is_recovering = np.full(len(data), False, dtype=bool)
     for tstamp in discharge_timestamps:
         is_recovering = is_recovering | np.where(
-            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            (
+                ((data["timestamp"] - tstamp) < 0.01)
+                & ((data["timestamp"] - tstamp) > 0)
+            ),
             True,
             False,
         )
-    cuts = np.where((data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering))[0]
+    cuts = np.where(
+        (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering)
+    )[0]
 
     tb_data = sto.read(
         f"{channel}/raw",
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
index 40ea3c3..7e13ed8 100644
--- a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
@@ -98,7 +98,12 @@ def aoe_calibration(
 
     aoe.calibrate(data, "AoE_Uncorr")
     log.info("Calibrated A/E")
-    return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe
+    return (
+        cal_dicts,
+        get_results_dict(aoe),
+        fill_plot_dict(aoe, data, plot_options),
+        aoe,
+    )
 
 
 argparser = argparse.ArgumentParser()
@@ -151,7 +156,9 @@ def aoe_calibration(
 
     pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak
 
-    sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit
+    sigma_func = (
+        eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit
+    )
 
     mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
 
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py
index 9a2f3c5..725fc84 100644
--- a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_ecal.py
@@ -48,7 +48,9 @@ def plot_2614_timemap(
     plt.rcParams["figure.figsize"] = figsize
     plt.rcParams["font.size"] = fontsize
 
-    selection = data.query(f"{cal_energy_param}>2560&{cal_energy_param}<2660&{selection_string}")
+    selection = data.query(
+        f"{cal_energy_param}>2560&{cal_energy_param}<2660&{selection_string}"
+    )
 
     fig = plt.figure()
     if len(selection) == 0:
@@ -68,7 +70,9 @@ def plot_2614_timemap(
         )
 
     ticks, labels = plt.xticks()
-    plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}")
+    plt.xlabel(
+        f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}"
+    )
     plt.ylabel("Energy(keV)")
     plt.ylim([erange[0], erange[1]])
 
@@ -120,7 +124,9 @@ def plot_pulser_timemap(
         )
         plt.ylim([mean - n_spread * spread, mean + n_spread * spread])
     ticks, labels = plt.xticks()
-    plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}")
+    plt.xlabel(
+        f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}"
+    )
     plt.ylabel("Energy(keV)")
 
     plt.xticks(
@@ -298,7 +304,9 @@ def plot_baseline_timemap(
     )
 
     ticks, labels = plt.xticks()
-    plt.xlabel(f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}")
+    plt.xlabel(
+        f"Time starting : {datetime.utcfromtimestamp(ticks[0]).strftime('%d/%m/%y %H:%M')}"
+    )
     plt.ylabel("Baseline Value")
     plt.ylim([mean - n_spread * spread, mean + n_spread * spread])
 
@@ -351,7 +359,9 @@ def baseline_tracking_plots(files, lh5_path, plot_options=None):
     if plot_options is None:
         plot_options = {}
     plot_dict = {}
-    data = lh5.read_as(lh5_path, files, "pd", field_mask=["bl_mean", "baseline", "timestamp"])
+    data = lh5.read_as(
+        lh5_path, files, "pd", field_mask=["bl_mean", "baseline", "timestamp"]
+    )
     for key, item in plot_options.items():
         if item["options"] is not None:
             plot_dict[key] = item["function"](data, **item["options"])
@@ -402,13 +412,21 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             dic.pop("covariance")
 
         return {
-            "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")),
-            "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")),
+            "total_fep": len(
+                data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")
+            ),
+            "total_dep": len(
+                data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")
+            ),
             "pass_fep": len(
-                data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}")
+                data.query(
+                    f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}"
+                )
             ),
             "pass_dep": len(
-                data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}")
+                data.query(
+                    f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}"
+                )
             ),
             "eres_linear": fwhm_linear,
             "eres_quadratic": fwhm_quad,
@@ -422,8 +440,12 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--files", help="filelist", nargs="*", type=str)
-    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
-    argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+    )
+    argparser.add_argument(
+        "--pulser_file", help="pulser_file", type=str, required=False
+    )
 
     argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*")
     argparser.add_argument("--in_hit_dict", help="in_hit_dict", required=False)
@@ -498,7 +520,12 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         files,
         f"{channel}/dsp",
         hit_dict,
-        params=[*kwarg_dict["energy_params"], kwarg_dict["cut_param"], "timestamp", "trapTmax"],
+        params=[
+            *kwarg_dict["energy_params"],
+            kwarg_dict["cut_param"],
+            "timestamp",
+            "trapTmax",
+        ],
         threshold=kwarg_dict["threshold"],
         return_selection_mask=True,
         cal_energy_param="trapTmax",
@@ -535,7 +562,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     glines = [pk_par[0] for pk_par in pk_pars]
 
     if "cal_energy_params" not in kwarg_dict:
-        cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]]
+        cal_energy_params = [
+            energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]
+        ]
     else:
         cal_energy_params = kwarg_dict["cal_energy_params"]
 
@@ -545,7 +574,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
     plot_dict = {}
     full_object_dict = {}
 
-    for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
+    for energy_param, cal_energy_param in zip(
+        kwarg_dict["energy_params"], cal_energy_params
+    ):
         e_uncal = data.query(selection_string)[energy_param].to_numpy()
 
         hist, bins, bar = pgh.get_hist(
@@ -625,7 +656,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             full_object_dict[cal_energy_param], data, cal_energy_param, selection_string
         )
 
-        hit_dict.update({cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()})
+        hit_dict.update(
+            {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()}
+        )
         if "ctc" in cal_energy_param:
             no_ctc_dict = full_object_dict[cal_energy_param].gen_pars_dict()
             no_ctc_dict["expression"] = no_ctc_dict["expression"].replace("_ctc", "")
@@ -641,15 +674,15 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         if args.plot_path:
             param_plot_dict = {}
             if ~np.isnan(full_object_dict[cal_energy_param].pars).all():
-                param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit(
-                    e_uncal
-                )
-                param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit(
-                    e_uncal
-                )
-                param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits(
-                    e_uncal
-                )
+                param_plot_dict["fwhm_fit"] = full_object_dict[
+                    cal_energy_param
+                ].plot_eres_fit(e_uncal)
+                param_plot_dict["cal_fit"] = full_object_dict[
+                    cal_energy_param
+                ].plot_cal_fit(e_uncal)
+                param_plot_dict["peak_fits"] = full_object_dict[
+                    cal_energy_param
+                ].plot_fits(e_uncal)
 
                 if "plot_options" in kwarg_dict:
                     for key, item in kwarg_dict["plot_options"].items():
@@ -738,7 +771,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             pkl.dump(total_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
 
     # save output dictionary
-    output_dict = convert_dict_np_to_float({"pars": hit_dict, "results": {"ecal": results_dict}})
+    output_dict = convert_dict_np_to_float(
+        {"pars": hit_dict, "results": {"ecal": results_dict}}
+    )
     Props.write_to(args.save_path, output_dict)
 
     # save calibration objects
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/pars_hit_qc.py
index c83dff7..5e6a378 100644
--- a/workflow/src/legenddataflow/scripts/pars_hit_qc.py
+++ b/workflow/src/legenddataflow/scripts/pars_hit_qc.py
@@ -33,8 +33,12 @@
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
 
-    argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
-    argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+    )
+    argparser.add_argument(
+        "--pulser_file", help="pulser_file", type=str, required=False
+    )
     argparser.add_argument(
         "--overwrite_files",
         help="overwrite_files",
@@ -93,7 +97,10 @@
     kwarg_dict_fft = kwarg_dict["fft_fields"]
     if len(fft_files) > 0:
         fft_fields = get_keys(
-            [key.replace(f"{channel}/dsp/", "") for key in ls(fft_files[0], f"{channel}/dsp/")],
+            [
+                key.replace(f"{channel}/dsp/", "")
+                for key in ls(fft_files[0], f"{channel}/dsp/")
+            ],
             kwarg_dict_fft["cut_parameters"],
         )
 
@@ -132,15 +139,21 @@
             hit_dict_fft.update(cut_dict)
             plot_dict_fft.update(cut_plots)
 
-            log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}")
+            log.debug(
+                f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}"
+            )
 
             ct_mask = np.full(len(cut_data), True, dtype=bool)
             for outname, info in cut_dict.items():
                 # convert to pandas eval
                 exp = info["expression"]
                 for key in info.get("parameters", None):
-                    exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
-                cut_data[outname] = cut_data.eval(exp, local_dict=info.get("parameters", None))
+                    exp = re.sub(
+                        f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp
+                    )
+                cut_data[outname] = cut_data.eval(
+                    exp, local_dict=info.get("parameters", None)
+                )
                 if "_classifier" not in outname:
                     ct_mask = ct_mask & cut_data[outname]
             cut_data = cut_data[ct_mask]
@@ -161,13 +174,19 @@
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
-        [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
+        [
+            key.replace(f"{channel}/dsp/", "")
+            for key in ls(cal_files[0], f"{channel}/dsp/")
+        ],
         kwarg_dict_cal["cut_parameters"],
     )
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         cut_fields += get_keys(
-            [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
+            [
+                key.replace(f"{channel}/dsp/", "")
+                for key in ls(cal_files[0], f"{channel}/dsp/")
+            ],
             init_cal["cut_parameters"],
         )
 
@@ -205,7 +224,10 @@
     is_recovering = np.full(len(data), False, dtype=bool)
     for tstamp in discharge_timestamps:
         is_recovering = is_recovering | np.where(
-            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            (
+                ((data["timestamp"] - tstamp) < 0.01)
+                & ((data["timestamp"] - tstamp) > 0)
+            ),
             True,
             False,
         )
@@ -213,7 +235,9 @@
 
     rng = np.random.default_rng()
     mask = np.full(len(data.query("~is_pulser & ~is_recovering")), False, dtype=bool)
-    mask[rng.choice(len(data.query("~is_pulser & ~is_recovering")), 4000, replace=False)] = True
+    mask[
+        rng.choice(len(data.query("~is_pulser & ~is_recovering")), 4000, replace=False)
+    ] = True
 
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
index 4ad0980..8aad849 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
@@ -57,7 +57,9 @@ def get_results_dict(aoe_class):
             "high_cut": aoe_class.high_cut_val,
             "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"),
             "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"),
-            "low_side_sfs_by_run": aoe_class.low_side_sfs_by_run[tstamp].to_dict("index"),
+            "low_side_sfs_by_run": aoe_class.low_side_sfs_by_run[tstamp].to_dict(
+                "index"
+            ),
             "2_side_sfs_by_run": aoe_class.two_side_sfs_by_run[tstamp].to_dict("index"),
         }
     return result_dict
@@ -123,7 +125,12 @@ def aoe_calibration(
     )
     aoe.calibrate(data, "AoE_Uncorr")
     log.info("Calibrated A/E")
-    return cal_dicts, get_results_dict(aoe), fill_plot_dict(aoe, data, plot_options), aoe
+    return (
+        cal_dicts,
+        get_results_dict(aoe),
+        fill_plot_dict(aoe, data, plot_options),
+        aoe,
+    )
 
 
 def run_aoe_calibration(
@@ -139,14 +146,13 @@ def run_aoe_calibration(
     # gen_plots=True,
 ):
     configs = LegendMetadata(path=configs)
-    channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_aoecal"][
-        "inputs"
-    ]["par_pht_aoecal_config"][channel]
+    channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"][
+        "pars_pht_aoecal"
+    ]["inputs"]["par_pht_aoecal_config"][channel]
 
     kwarg_dict = Props.read_from(channel_dict)
 
     if kwarg_dict.pop("run_aoe") is True:
-
         kwarg_dict.pop("pulser_multiplicity_threshold")
         kwarg_dict.pop("threshold")
         if "plot_options" in kwarg_dict:
@@ -155,9 +161,15 @@ def run_aoe_calibration(
 
         pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak
 
-        mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
+        mean_func = (
+            eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
+        )
 
-        sigma_func = eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit
+        sigma_func = (
+            eval(kwarg_dict.pop("sigma_func"))
+            if "sigma_func" in kwarg_dict
+            else SigmaFit
+        )
 
         if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None:
             cut_dict = kwarg_dict["dt_cut"]["cut"]
@@ -212,9 +224,9 @@ def eres_func(x):
         aoe_obj.pdf = aoe_obj.pdf.name
         # need to change eres func as can't pickle lambdas
         try:
-            aoe_obj.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][
-                kwarg_dict["cal_energy_param"]
-            ]["eres_linear"]
+            aoe_obj.eres_func = results_dicts[next(iter(results_dicts))][
+                "partition_ecal"
+            ][kwarg_dict["cal_energy_param"]]["eres_linear"]
         except KeyError:
             aoe_obj.eres_func = {}
     else:
@@ -230,7 +242,9 @@ def eres_func(x):
     for tstamp, object_dict in object_dicts.items():
         out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe_obj)
 
-    common_dict = aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None
+    common_dict = (
+        aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None
+    )
     out_plot_dicts = {}
     for tstamp, plot_dict in plot_dicts.items():
         if "common" in list(plot_dict) and common_dict is not None:
@@ -244,18 +258,25 @@ def eres_func(x):
 
 
 if __name__ == "__main__":
-
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--input_files", help="files", type=str, nargs="*", required=True
+    )
     argparser.add_argument(
         "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
         "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--inplots", help="eres_file", type=str, nargs="*", required=True
+    )
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata", type=str)
@@ -265,7 +286,9 @@ def eres_func(x):
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
+    argparser.add_argument(
+        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+    )
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str)
 
@@ -383,7 +406,10 @@ def eres_func(x):
 
         for tstamp in cal_dict:
             if tstamp not in np.unique(data["run_timestamp"]):
-                row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data}
+                row = {
+                    key: [False] if data.dtypes[key] == "bool" else [np.nan]
+                    for key in data
+                }
                 row["run_timestamp"] = tstamp
                 row = pd.DataFrame(row)
                 data = pd.concat([data, row])
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/pars_pht_fast.py
index a807fa6..6dda1b7 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_fast.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_fast.py
@@ -44,16 +44,24 @@ def run_splitter(files):
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--input_files", help="files", type=str, nargs="*", required=True
+    )
     argparser.add_argument(
         "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
         "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--inplots", help="eres_file", type=str, nargs="*", required=True
+    )
 
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -63,7 +71,9 @@ def run_splitter(files):
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
     argparser.add_argument("--log", help="log_file", type=str)
 
-    argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
+    argparser.add_argument(
+        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+    )
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
 
@@ -121,7 +131,9 @@ def run_splitter(files):
         final_dict[timestamp] = sorted(filelist)
 
     kwarg_dict = Props.read_from(
-        config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][args.channel]
+        config_dict["pars_pht_partcal"]["inputs"]["pars_pht_partcal_config"][
+            args.channel
+        ]
     )
     aoe_kwarg_dict = Props.read_from(
         config_dict["pars_pht_aoecal"]["inputs"]["par_pht_aoecal_config"][args.channel]
@@ -200,7 +212,9 @@ def run_splitter(files):
 
     for tstamp in cal_dict:
         if tstamp not in np.unique(data["run_timestamp"]):
-            row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data}
+            row = {
+                key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data
+            }
             row["run_timestamp"] = tstamp
             row = pd.DataFrame(row)
             data = pd.concat([data, row])
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
index a6a231a..78c8c6e 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
@@ -153,14 +153,13 @@ def run_lq_calibration(
     # gen_plots=True,
 ):
     configs = LegendMetadata(path=configs)
-    channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_lqcal"][
-        "inputs"
-    ]["lqcal_config"][channel]
+    channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"][
+        "pars_pht_lqcal"
+    ]["inputs"]["lqcal_config"][channel]
 
     kwarg_dict = Props.read_from(channel_dict)
 
     if kwarg_dict.pop("run_lq") is True:
-
         if "plot_options" in kwarg_dict:
             for field, item in kwarg_dict["plot_options"].items():
                 kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
@@ -208,9 +207,9 @@ def eres_func(x):
         )
         # need to change eres func as can't pickle lambdas
         try:
-            lq_obj.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][
-                kwarg_dict["cal_energy_param"]
-            ]["eres_linear"]
+            lq_obj.eres_func = results_dicts[next(iter(results_dicts))][
+                "partition_ecal"
+            ][kwarg_dict["cal_energy_param"]]["eres_linear"]
         except KeyError:
             lq_obj.eres_func = {}
     else:
@@ -241,16 +240,24 @@ def eres_func(x):
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--input_files", help="files", type=str, nargs="*", required=True
+    )
     argparser.add_argument(
         "--pulser_files", help="pulser_file", type=str, nargs="*", required=False
     )
     argparser.add_argument(
         "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--inplots", help="eres_file", type=str, nargs="*", required=True
+    )
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
@@ -260,7 +267,9 @@ def eres_func(x):
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
+    argparser.add_argument(
+        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+    )
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str)
 
@@ -370,7 +379,10 @@ def eres_func(x):
 
         for tstamp in cal_dict:
             if tstamp not in np.unique(data["run_timestamp"]):
-                row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data}
+                row = {
+                    key: [False] if data.dtypes[key] == "bool" else [np.nan]
+                    for key in data
+                }
                 row["run_timestamp"] = tstamp
                 row = pd.DataFrame(row)
                 data = pd.concat([data, row])
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py
index 7bd8f66..bd2d93f 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_partcal.py
@@ -113,13 +113,21 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             dic.pop("covariance")
 
         out_dict = {
-            "total_fep": len(data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")),
-            "total_dep": len(data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")),
+            "total_fep": len(
+                data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624")
+            ),
+            "total_dep": len(
+                data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597")
+            ),
             "pass_fep": len(
-                data.query(f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}")
+                data.query(
+                    f"{cal_energy_param}>2604&{cal_energy_param}<2624&{selection_string}"
+                )
             ),
             "pass_dep": len(
-                data.query(f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}")
+                data.query(
+                    f"{cal_energy_param}>1587&{cal_energy_param}<1597&{selection_string}"
+                )
             ),
             "eres_linear": fwhm_linear,
             "eres_quadratic": fwhm_quad,
@@ -128,7 +136,9 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
             "peak_param": results_dict["peak_param"],
         }
         if "calibration_parameters" in results_dict:
-            out_dict["calibration_parameters"] = results_dict["calibration_parameters"].to_dict()
+            out_dict["calibration_parameters"] = results_dict[
+                "calibration_parameters"
+            ].to_dict()
             out_dict["calibration_uncertainty"] = results_dict[
                 "calibration_uncertainties"
             ].to_dict()
@@ -149,13 +159,12 @@ def calibrate_partition(
     datatype,
     gen_plots=True,
 ):
-
     det_status = chmap[channel]["analysis"]["usability"]
 
     configs = LegendMetadata(path=configs)
-    channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"]["pars_pht_partcal"][
-        "inputs"
-    ]["pars_pht_partcal_config"][channel]
+    channel_dict = configs.on(timestamp, system=datatype)["snakemake_rules"][
+        "pars_pht_partcal"
+    ]["inputs"]["pars_pht_partcal_config"][channel]
 
     kwarg_dict = Props.read_from(channel_dict)
 
@@ -201,7 +210,9 @@ def calibrate_partition(
     glines = [pk_par[0] for pk_par in pk_pars]
 
     if "cal_energy_params" not in kwarg_dict:
-        cal_energy_params = [energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]]
+        cal_energy_params = [
+            energy_param + "_cal" for energy_param in kwarg_dict["energy_params"]
+        ]
     else:
         cal_energy_params = kwarg_dict["cal_energy_params"]
 
@@ -211,14 +222,17 @@ def calibrate_partition(
     partcal_plot_dict = {}
     full_object_dict = {}
 
-    for energy_param, cal_energy_param in zip(kwarg_dict["energy_params"], cal_energy_params):
+    for energy_param, cal_energy_param in zip(
+        kwarg_dict["energy_params"], cal_energy_params
+    ):
         energy = data.query(selection_string)[energy_param].to_numpy()
         full_object_dict[cal_energy_param] = HPGeCalibration(
             energy_param,
             glines,
             1,
             kwarg_dict.get("deg", 0),
-            debug_mode=kwarg_dict.get("debug_mode", False) | args.debug,  # , fixed={1: 1}
+            debug_mode=kwarg_dict.get("debug_mode", False)
+            | args.debug,  # , fixed={1: 1}
         )
         full_object_dict[cal_energy_param].hpge_get_energy_peaks(
             energy,
@@ -249,9 +263,9 @@ def calibrate_partition(
             if csqr[0] / csqr[1] < 100:
                 allowed_p_val = (
                     0.9
-                    * full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"][
-                        "peak_parameters"
-                    ][2614.511]["p_value"]
+                    * full_object_dict[cal_energy_param].results[
+                        "hpge_fit_energy_peaks"
+                    ]["peak_parameters"][2614.511]["p_value"]
                 )
 
                 full_object_dict[cal_energy_param] = HPGeCalibration(
@@ -296,11 +310,14 @@ def calibrate_partition(
             full_object_dict[cal_energy_param], data, cal_energy_param, selection_string
         )
         cal_dicts = update_cal_dicts(
-            cal_dicts, {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()}
+            cal_dicts,
+            {cal_energy_param: full_object_dict[cal_energy_param].gen_pars_dict()},
         )
         if "ctc" in cal_energy_param:
             no_ctc_dict = full_object_dict[cal_energy_param].gen_pars_dict()
-            no_ctc_dict["expression"] = no_ctc_dict["expression"].replace("ctc", "noctc")
+            no_ctc_dict["expression"] = no_ctc_dict["expression"].replace(
+                "ctc", "noctc"
+            )
 
             cal_dicts = update_cal_dicts(
                 cal_dicts, {cal_energy_param.replace("ctc", "noctc"): no_ctc_dict}
@@ -318,42 +335,42 @@ def calibrate_partition(
         if gen_plots is True:
             param_plot_dict = {}
             if ~np.isnan(full_object_dict[cal_energy_param].pars).all():
-                param_plot_dict["fwhm_fit"] = full_object_dict[cal_energy_param].plot_eres_fit(
-                    energy
-                )
-                param_plot_dict["cal_fit"] = full_object_dict[cal_energy_param].plot_cal_fit(
-                    energy
-                )
+                param_plot_dict["fwhm_fit"] = full_object_dict[
+                    cal_energy_param
+                ].plot_eres_fit(energy)
+                param_plot_dict["cal_fit"] = full_object_dict[
+                    cal_energy_param
+                ].plot_cal_fit(energy)
                 if det_status == "on":
                     param_plot_dict["cal_fit_with_errors"] = full_object_dict[
                         cal_energy_param
                     ].plot_cal_fit_with_errors(energy)
                 if (
                     len(
-                        full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"][
-                            "peak_parameters"
-                        ]
+                        full_object_dict[cal_energy_param].results[
+                            "hpge_fit_energy_peaks"
+                        ]["peak_parameters"]
                     )
                     < 17
                 ):
-                    param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits(
-                        energy, ncols=4, nrows=4
-                    )
+                    param_plot_dict["peak_fits"] = full_object_dict[
+                        cal_energy_param
+                    ].plot_fits(energy, ncols=4, nrows=4)
                 elif (
                     len(
-                        full_object_dict[cal_energy_param].results["hpge_fit_energy_peaks"][
-                            "peak_parameters"
-                        ]
+                        full_object_dict[cal_energy_param].results[
+                            "hpge_fit_energy_peaks"
+                        ]["peak_parameters"]
                     )
                     < 26
                 ):
-                    param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits(
-                        energy, ncols=5, nrows=5
-                    )
+                    param_plot_dict["peak_fits"] = full_object_dict[
+                        cal_energy_param
+                    ].plot_fits(energy, ncols=5, nrows=5)
                 else:
-                    param_plot_dict["peak_fits"] = full_object_dict[cal_energy_param].plot_fits(
-                        energy, ncols=6, nrows=5
-                    )
+                    param_plot_dict["peak_fits"] = full_object_dict[
+                        cal_energy_param
+                    ].plot_fits(energy, ncols=6, nrows=5)
 
                 if "plot_options" in kwarg_dict:
                     for key, item in kwarg_dict["plot_options"].items():
@@ -389,7 +406,9 @@ def calibrate_partition(
     for tstamp, object_dict in object_dicts.items():
         out_object_dicts[tstamp] = dict(**object_dict, partition_ecal=full_object_dict)
 
-    common_dict = partcal_plot_dict.pop("common") if "common" in list(partcal_plot_dict) else None
+    common_dict = (
+        partcal_plot_dict.pop("common") if "common" in list(partcal_plot_dict) else None
+    )
     out_plot_dicts = {}
     for tstamp, plot_dict in plot_dicts.items():
         if "common" in list(plot_dict) and common_dict is not None:
@@ -404,16 +423,24 @@ def calibrate_partition(
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--input_files", help="files", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--input_files", help="files", type=str, nargs="*", required=True
+    )
     argparser.add_argument(
         "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
         "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--ecal_file", help="ecal_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--eres_file", help="eres_file", type=str, nargs="*", required=True)
-    argparser.add_argument("--inplots", help="eres_file", type=str, nargs="*", required=True)
+    argparser.add_argument(
+        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+    )
+    argparser.add_argument(
+        "--inplots", help="eres_file", type=str, nargs="*", required=True
+    )
 
     argparser.add_argument("--timestamp", help="Datatype", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -423,7 +450,9 @@ def calibrate_partition(
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
     argparser.add_argument("--log", help="log_file", type=str)
 
-    argparser.add_argument("--plot_file", help="plot_file", type=str, nargs="*", required=False)
+    argparser.add_argument(
+        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+    )
     argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
     argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
 
@@ -525,7 +554,9 @@ def calibrate_partition(
 
     for tstamp in cal_dict:
         if tstamp not in np.unique(data["run_timestamp"]):
-            row = {key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data}
+            row = {
+                key: [False] if data.dtypes[key] == "bool" else [np.nan] for key in data
+            }
             row["run_timestamp"] = tstamp
             row = pd.DataFrame(row)
             data = pd.concat([data, row])
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/pars_pht_qc.py
index e1cf4dd..feee4e5 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_qc.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_qc.py
@@ -51,7 +51,9 @@
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
+    argparser.add_argument(
+        "--plot_path", help="plot_path", type=str, nargs="*", required=False
+    )
     argparser.add_argument(
         "--save_path",
         help="save_path",
@@ -156,15 +158,21 @@
                 hit_dict_fft.update(cut_dict)
                 plot_dict_fft.update(cut_plots)
 
-                log.debug(f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}")
+                log.debug(
+                    f"{name} calculated cut_dict is: {json.dumps(cut_dict, indent=2)}"
+                )
 
                 ct_mask = np.full(len(cut_data), True, dtype=bool)
                 for outname, info in cut_dict.items():
                     # convert to pandas eval
                     exp = info["expression"]
                     for key in info.get("parameters", None):
-                        exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
-                    cut_data[outname] = cut_data.eval(exp, local_dict=info.get("parameters", None))
+                        exp = re.sub(
+                            f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp
+                        )
+                    cut_data[outname] = cut_data.eval(
+                        exp, local_dict=info.get("parameters", None)
+                    )
                     if "_classifier" not in outname:
                         ct_mask = ct_mask & cut_data[outname]
                 cut_data = cut_data[ct_mask]
@@ -188,13 +196,19 @@
     kwarg_dict_cal = kwarg_dict["cal_fields"]
 
     cut_fields = get_keys(
-        [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
+        [
+            key.replace(f"{channel}/dsp/", "")
+            for key in ls(cal_files[0], f"{channel}/dsp/")
+        ],
         kwarg_dict_cal["cut_parameters"],
     )
     if "initial_cal_cuts" in kwarg_dict:
         init_cal = kwarg_dict["initial_cal_cuts"]
         cut_fields += get_keys(
-            [key.replace(f"{channel}/dsp/", "") for key in ls(cal_files[0], f"{channel}/dsp/")],
+            [
+                key.replace(f"{channel}/dsp/", "")
+                for key in ls(cal_files[0], f"{channel}/dsp/")
+            ],
             init_cal["cut_parameters"],
         )
 
@@ -237,7 +251,10 @@
     is_recovering = np.full(len(data), False, dtype=bool)
     for tstamp in discharge_timestamps:
         is_recovering = is_recovering | np.where(
-            (((data["timestamp"] - tstamp) <= 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            (
+                ((data["timestamp"] - tstamp) <= 0.01)
+                & ((data["timestamp"] - tstamp) > 0)
+            ),
             True,
             False,
         )
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
index b48211f..71167df 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
+++ b/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
@@ -39,7 +39,9 @@
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_path", help="plot_path", type=str, nargs="*", required=False)
+    argparser.add_argument(
+        "--plot_path", help="plot_path", type=str, nargs="*", required=False
+    )
     argparser.add_argument(
         "--save_path",
         help="save_path",
@@ -75,7 +77,9 @@
             else:
                 run_files = sorted(np.unique(run_files))
                 phy_files += run_files
-                bls = sto.read("ch1027200/dsp/", run_files, field_mask=["wf_max", "bl_mean"])[0]
+                bls = sto.read(
+                    "ch1027200/dsp/", run_files, field_mask=["wf_max", "bl_mean"]
+                )[0]
                 puls = sto.read("ch1027201/dsp/", run_files, field_mask=["trapTmax"])[0]
                 bl_idxs = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (
                     puls["trapTmax"].nda < 200
@@ -87,12 +91,17 @@
         phy_files = sorted(np.unique(phy_files))
         bls = sto.read("ch1027200/dsp/", phy_files, field_mask=["wf_max", "bl_mean"])[0]
         puls = sto.read("ch1027201/dsp/", phy_files, field_mask=["trapTmax"])[0]
-        bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (puls["trapTmax"].nda < 200)
+        bl_mask = ((bls["wf_max"].nda - bls["bl_mean"].nda) > 1000) & (
+            puls["trapTmax"].nda < 200
+        )
 
     kwarg_dict_fft = kwarg_dict["fft_fields"]
 
     cut_fields = get_keys(
-        [key.replace(f"{channel}/dsp/", "") for key in ls(phy_files[0], f"{channel}/dsp/")],
+        [
+            key.replace(f"{channel}/dsp/", "")
+            for key in ls(phy_files[0], f"{channel}/dsp/")
+        ],
         kwarg_dict_fft["cut_parameters"],
     )
 
@@ -108,7 +117,10 @@
     is_recovering = np.full(len(data), False, dtype=bool)
     for tstamp in discharge_timestamps:
         is_recovering = is_recovering | np.where(
-            (((data["timestamp"] - tstamp) < 0.01) & ((data["timestamp"] - tstamp) > 0)),
+            (
+                ((data["timestamp"] - tstamp) < 0.01)
+                & ((data["timestamp"] - tstamp) > 0)
+            ),
             True,
             False,
         )
@@ -138,7 +150,9 @@
             exp = info["expression"]
             for key in info.get("parameters", None):
                 exp = re.sub(f"(?<![a-zA-Z0-9]){key}(?![a-zA-Z0-9])", f"@{key}", exp)
-            cut_data[outname] = cut_data.eval(exp, local_dict=info.get("parameters", None))
+            cut_data[outname] = cut_data.eval(
+                exp, local_dict=info.get("parameters", None)
+            )
             if "_classifier" not in outname:
                 ct_mask = ct_mask & cut_data[outname]
         cut_data = cut_data[ct_mask]
diff --git a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
index 5325dc1..ad46f0c 100644
--- a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
+++ b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
@@ -49,7 +49,9 @@
     tcm_files = args.tcm_files
 # get pulser mask from tcm files
 tcm_files = sorted(np.unique(tcm_files))
-ids, mask = get_tcm_pulser_ids(tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold"))
+ids, mask = get_tcm_pulser_ids(
+    tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+)
 
 Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
 Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/workflow/src/legenddataflow/utils.py b/workflow/src/legenddataflow/utils.py
index 6bcbb01..55af7ca 100644
--- a/workflow/src/legenddataflow/utils.py
+++ b/workflow/src/legenddataflow/utils.py
@@ -40,7 +40,19 @@ def tier_path(setup):
 
 
 def get_tier_path(setup, tier):
-    if tier in ["raw", "tcm", "dsp", "hit", "ann", "evt", "psp", "pht", "pan", "pet", "skm"]:
+    if tier in [
+        "raw",
+        "tcm",
+        "dsp",
+        "hit",
+        "ann",
+        "evt",
+        "psp",
+        "pht",
+        "pan",
+        "pet",
+        "skm",
+    ]:
         return setup["paths"][f"tier_{tier}"]
     else:
         msg = f"no tier matching:{tier}"
@@ -145,7 +157,9 @@ def subst_vars(props, var_values=None, use_env=False, ignore_missing=False):
 
 
 def subst_vars_in_snakemake_config(workflow, config):
-    config_filename = workflow.overwrite_configfiles[0]  # ToDo: Better way of handling this?
+    config_filename = workflow.overwrite_configfiles[
+        0
+    ]  # ToDo: Better way of handling this?
     subst_vars(
         config,
         var_values={"_": Path(config_filename).parent},
@@ -209,7 +223,10 @@ def set_last_rule_name(workflow, new_name):
 
 
 def as_ro(config, path):
-    if "read_only_fs_sub_pattern" not in config or config["read_only_fs_sub_pattern"] is None:
+    if (
+        "read_only_fs_sub_pattern" not in config
+        or config["read_only_fs_sub_pattern"] is None
+    ):
         return path
 
     sub_pattern = config["read_only_fs_sub_pattern"]

From ae7d4703d7f32ff65d78bb4bcc859d9b2f6273e4 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 17:10:15 +0100
Subject: [PATCH 073/101] fix a couple of problems with tests and setup GHA

---
 .github/dependabot.yml     |  7 +++
 .github/workflows/main.yml | 59 ++++++++++++++++++++++++
 tests/test_util.py         | 94 ++++++++++++--------------------------
 3 files changed, 94 insertions(+), 66 deletions(-)
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/main.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..f9ecf57
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..974d240
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,59 @@
+name: lgdo
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - main
+      - "releases/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  FORCE_COLOR: 3
+
+jobs:
+  build-and-test:
+    name: Test lgdo with Python
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.11", "3.12"]
+        os: [ubuntu-latest, macos-13]
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Get dependencies and install legend-dataflow
+        run: |
+          python -m pip install --upgrade pip wheel setuptools
+          python -m pip install --upgrade .[test]
+      - name: Run unit tests
+        run: |
+          python -m pytest
+
+  test-coverage:
+    name: Calculate and upload test coverage
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Generate Report
+        run: |
+          python -m pip install --upgrade pip wheel setuptools
+          python -m pip install --upgrade .[test]
+          python -m pytest --cov=legenddataflow --cov-report=xml
+      - name: Upload Coverage to codecov.io
+        uses: codecov/codecov-action@v5
diff --git a/tests/test_util.py b/tests/test_util.py
index c1654e0..38d8910 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,33 +1,25 @@
 import json
 from pathlib import Path
 
-from scripts.library import (
-    CalibCatalog,
+from legenddataflow import (
     FileKey,
-    pars_catalog,
-    pars_key_resolve,
+    ParsKeyResolve,
+    patterns,
     subst_vars,
-    unix_time,
-)
-from scripts.library.patterns import get_pattern_tier_daq, get_pattern_tier_dsp
-from scripts.library.utils import (
-    par_dsp_path,
-    par_overwrite_path,
-    tier_dsp_path,
-    tier_path,
+    utils,
 )
 
 testprod = Path(__file__).parent / "dummy_cycle"
 
-with testprod.open() as r:
+with (testprod / "config.json").open() as r:
     setup = json.load(r)
 subst_vars(setup, var_values={"_": str(testprod)})
 setup = setup["setups"]["test"]
 
 
 def test_util():
-    assert tier_path(setup) == str(testprod / "generated/tier")
-    assert unix_time("20230101T123456Z") == 1672572896.0
+    assert utils.tier_path(setup) == str(testprod / "generated/tier")
+    assert utils.unix_time("20230101T123456Z") == 1672572896.0
 
 
 def test_filekey():
@@ -44,13 +36,13 @@ def test_filekey():
     )
     assert key.name == "l200-p00-r000-cal-20230101T123456Z"
     assert (
-        key.get_path_from_filekey(get_pattern_tier_dsp(setup))[0]
-        == f"{tier_dsp_path(setup)}/cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5"
+        key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0]
+        == f"{utils.get_tier_path(setup, 'dsp')}/cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-tier_dsp.lh5"
     )
     assert (
         FileKey.get_filekey_from_pattern(
-            key.get_path_from_filekey(get_pattern_tier_dsp(setup))[0],
-            get_pattern_tier_dsp(setup),
+            key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0],
+            utils.get_pattern_tier(setup, "dsp"),
         ).name
         == key.name
     )
@@ -59,29 +51,29 @@ def test_filekey():
 def test_create_pars_keylist():
     key1 = FileKey("l200", "p00", "r000", "cal", "20230101T123456Z")
     assert (
-        pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from
+        ParsKeyResolve.from_filekey(key1, {"cal": ["par_dsp"]}).valid_from
         == "20230101T123456Z"
     )
     key2 = FileKey("l200", "p00", "r000", "cal", "20230102T123456Z")
-    assert pars_key_resolve.match_keys(key1, key2) == key1
+    assert ParsKeyResolve.match_keys(key1, key2) == key1
     key3 = FileKey("l200", "p00", "r000", "cal", "20230101T000000Z")
-    assert pars_key_resolve.match_keys(key1, key3) == key3
-    assert pars_key_resolve.generate_par_keylist([key1, key2, key3]) == [key3]
-    pkey1 = pars_key_resolve.from_filekey(key1, {"cal": ["par_dsp"]})
-    pkey2 = pars_key_resolve.from_filekey(
-        FileKey("l200", "p00", "r000", "lar", "20230102T123456Z"), {"lar": ["par_dsp"]}
+    assert ParsKeyResolve.match_keys(key1, key3) == key3
+    assert ParsKeyResolve.generate_par_keylist([key1, key2, key3]) == [key3]
+    pkey1 = ParsKeyResolve.from_filekey(key1, {"cal": ["par_dsp"]})
+    pkey2 = ParsKeyResolve.from_filekey(
+        FileKey("l200", "p00", "r000", "lar", "20230102T123456Z"), {"lar": "par_dsp"}
     )
     assert pkey2.apply == [
-        "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json"
+        "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml"
     ]
-    pars_key_resolve.match_entries(pkey1, pkey2)
+    ParsKeyResolve.match_entries(pkey1, pkey2)
     assert set(pkey2.apply) == {
-        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
-        "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.json",
+        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml",
+        "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml",
     }
 
     keylist = sorted(
-        pars_key_resolve.get_keys("-*-*-*-cal", get_pattern_tier_daq(setup)),
+        ParsKeyResolve.get_keys("-*-*-*-cal", patterns.get_pattern_tier_daq(setup)),
         key=FileKey.get_unix_timestamp,
     )
     assert keylist == [
@@ -89,7 +81,9 @@ def test_create_pars_keylist():
         FileKey("l200", "p00", "r001", "cal", "20230202T004321Z"),
     ]
 
-    keylist += pars_key_resolve.get_keys("-*-*-*-lar", get_pattern_tier_daq(setup))
+    keylist += ParsKeyResolve.get_keys(
+        "-*-*-*-lar", patterns.get_pattern_tier_daq(setup)
+    )
     keylist = sorted(keylist, key=FileKey.get_unix_timestamp)
     assert keylist == [
         FileKey("l200", "p00", "r000", "cal", "20230101T123456Z"),
@@ -97,45 +91,13 @@ def test_create_pars_keylist():
         FileKey("l200", "p00", "r001", "cal", "20230202T004321Z"),
     ]
 
-    pkeylist = pars_key_resolve.generate_par_keylist(keylist)
+    pkeylist = ParsKeyResolve.generate_par_keylist(keylist)
     assert pkeylist == keylist
     assert set(
-        pars_key_resolve.match_all_entries(
+        ParsKeyResolve.match_all_entries(
             pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]}
         )[1].apply
     ) == {
         "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
         "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json",
     }
-
-
-def test_pars_loading():
-    pars_files = CalibCatalog.get_calib_files(
-        Path(par_dsp_path(setup)) / "validity.jsonl", "20230101T123456Z"
-    )
-    assert pars_files == [
-        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"
-    ]
-
-    par_override_files = CalibCatalog.get_calib_files(
-        Path(par_overwrite_path(setup)) / "dsp" / "validity.jsonl", "20230101T123456Z"
-    )
-
-    pars_files, pars_files_overwrite = pars_catalog.match_pars_files(
-        pars_files, par_override_files
-    )
-
-    assert pars_files == [
-        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"
-    ]
-
-    assert set(pars_catalog.get_par_file(setup, "20230101T123456Z", "dsp")) == {
-        (
-            Path(par_dsp_path(setup))
-            / "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
-        ),
-        (
-            Path(par_overwrite_path(setup))
-            / "dsp/cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json",
-        ),
-    }

From 385874e061d02238e15208c8a86fa135349e67c1 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 17:11:50 +0100
Subject: [PATCH 074/101] fix docs

---
 .readthedocs.yaml | 1 -
 docs/Makefile     | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 103c066..d2e9f58 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -17,7 +17,6 @@ build:
       --module-first
       --force
       --output-dir docs/source/api
-      workflow/scripts
       workflow/src
       workflow/rules
     - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D
diff --git a/docs/Makefile b/docs/Makefile
index b85f221..667234c 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -17,8 +17,7 @@ apidoc: clean-apidoc
       --module-first \
       --force \
       --output-dir "$(SOURCEDIR)/api" \
-      ../workflow/src/legenddataflow \
-      ../workflow/scripts \
+      ../workflow/src \
       ../workflow/rules
 
 clean-apidoc:

From 902e248d49fe097b545beea6fcfc41757ff16f88 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 30 Jan 2025 17:12:40 +0100
Subject: [PATCH 075/101] fix coverage upload in ci

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 974d240..c080083 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -48,7 +48,7 @@ jobs:
           fetch-depth: 2
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.11"
 
       - name: Generate Report
         run: |

From 0dec815c741129846e58288994dcaf9b4fb69718 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Fri, 31 Jan 2025 17:30:54 +0100
Subject: [PATCH 076/101] add timestamp to log files and don't remove on
 completion

---
 .pre-commit-config.yaml                     |  1 +
 workflow/Snakefile                          | 20 ++++----------------
 workflow/Snakefile-build-raw                |  3 +++
 workflow/rules/ann.smk                      |  4 ++--
 workflow/rules/blinding_calibration.smk     |  2 +-
 workflow/rules/blinding_check.smk           |  2 +-
 workflow/rules/dsp.smk                      |  2 +-
 workflow/rules/dsp_pars_geds.smk            | 14 +++++++-------
 workflow/rules/evt.smk                      |  6 +++---
 workflow/rules/hit.smk                      | 10 +++++-----
 workflow/rules/pht.smk                      | 16 ++++++++++------
 workflow/rules/pht_fast.smk                 |  3 ++-
 workflow/rules/psp.smk                      |  2 +-
 workflow/rules/psp_pars_geds.smk            |  7 ++++---
 workflow/rules/qc_phy.smk                   |  3 ++-
 workflow/rules/raw.smk                      |  6 +++---
 workflow/rules/skm.smk                      |  2 +-
 workflow/rules/tcm.smk                      |  4 ++--
 workflow/src/legenddataflow/cal_grouping.py |  5 ++++-
 workflow/src/legenddataflow/patterns.py     |  9 ++++++---
 20 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0611a74..8f713be 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -103,3 +103,4 @@ repos:
     hooks:
       - id: snakefmt
         files: Snakefile*|\.smk
+        exclude: channel_merge.smk
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 9fa6950..50be710 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -32,10 +32,10 @@ det_status = utils.det_status_path(config)
 swenv = utils.runcmd(config)
 basedir = workflow.basedir
 
-# wait for new pylegendmeta release
-# if not Path(meta).exists():
-#     meta = LegendMetadata()
-#     meta.checkout(config["configs"]["l200"]["legend_metadata_version"])
+time = datetime.now().strftime("%Y%m%dT%H%M%SZ")
+
+if not Path(meta).exists():
+    LegendMetadata().checkout(config["legend_metadata_version"])
 
 part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml")
 
@@ -146,18 +146,6 @@ onsuccess:
     if os.path.exists(utils.filelist_path(config)):
         os.rmdir(utils.filelist_path(config))
 
-        # remove logs
-    files = glob.glob(os.path.join(utils.tmp_log_path(config), "*", "*.log"))
-    for file in files:
-        if os.path.isfile(file):
-            os.remove(file)
-    dirs = glob.glob(os.path.join(utils.tmp_log_path(config), "*"))
-    for d in dirs:
-        if os.path.isdir(d):
-            os.rmdir(d)
-    if os.path.exists(utils.tmp_log_path(config)):
-        os.rmdir(utils.tmp_log_path(config))
-
 
 rule gen_filelist:
     """Generate file list.
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index fafd20c..da0d58d 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -10,6 +10,7 @@ import os, sys
 from pathlib import Path
 from legenddataflow import patterns as patt
 from legenddataflow import utils, execenv, ParsKeyResolve
+from datetime import datetime
 
 utils.subst_vars_in_snakemake_config(workflow, config)
 
@@ -18,6 +19,8 @@ swenv = execenv.execenv_prefix(config)
 meta_path = utils.metadata_path(config)
 det_status = utils.det_status_path(config)
 
+time = datetime.now().strftime("%Y%m%dT%H%M%SZ")
+
 if not Path(meta_path).exists():
     LegendMetadata(meta_path).checkout(config["legend_metadata_version"])
 
diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index 2565514..d572b06 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -22,7 +22,7 @@ rule build_ann:
         tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle),
         db_file=get_pattern_pars_tmp(setup, "ann_db"),
     log:
-        get_pattern_log(setup, "tier_ann"),
+        get_pattern_log(setup, "tier_ann", time),
     group:
         "tier-ann"
     resources:
@@ -54,7 +54,7 @@ rule build_pan:
         tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle),
         db_file=get_pattern_pars_tmp(setup, "pan_db"),
     log:
-        get_pattern_log(setup, "tier_pan"),
+        get_pattern_log(setup, "tier_pan", time),
     group:
         "tier-ann"
     resources:
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index b8076d7..537a051 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -31,7 +31,7 @@ rule build_blinding_calibration:
         par_file=temp(get_pattern_pars_tmp_channel(setup, "raw_blindcal")),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw_blindcal")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_blind_cal"),
+        get_pattern_log_channel(setup, "pars_hit_blind_cal", time),
     group:
         "par-raw-blinding"
     resources:
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index b142c19..ba552fc 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -32,7 +32,7 @@ rule build_blinding_check:
         check_file=temp(get_pattern_pars_tmp_channel(setup, "raw")),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_blind_check"),
+        get_pattern_log_channel(setup, "pars_hit_blind_check", time),
     group:
         "par-hit"
     resources:
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index 501ed52..d40de3d 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -185,7 +185,7 @@ rule build_dsp:
         tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
         db_file=get_pattern_pars_tmp(setup, "dsp_db"),
     log:
-        get_pattern_log(setup, "tier_dsp"),
+        get_pattern_log(setup, "tier_dsp", time),
     group:
         "tier-dsp"
     resources:
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index f526d6b..8f030cb 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -35,7 +35,7 @@ rule build_pars_dsp_tau_geds:
         decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")),
     log:
-        get_pattern_log_channel(setup, "par_dsp_decay_constant"),
+        get_pattern_log_channel(setup, "par_dsp_decay_constant", time),
     group:
         "par-dsp"
     resources:
@@ -69,7 +69,7 @@ rule build_pars_evtsel_geds:
     output:
         peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")),
     log:
-        get_pattern_log_channel(setup, "par_dsp_event_selection"),
+        get_pattern_log_channel(setup, "par_dsp_event_selection", time),
     group:
         "par-dsp"
     resources:
@@ -108,7 +108,7 @@ rule build_pars_dsp_nopt_geds:
         ),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")),
     log:
-        get_pattern_log_channel(setup, "par_dsp_noise_optimization"),
+        get_pattern_log_channel(setup, "par_dsp_noise_optimization", time),
     group:
         "par-dsp"
     resources:
@@ -148,7 +148,7 @@ rule build_pars_dsp_dplms_geds:
         ),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_dplms"),
+        get_pattern_log_channel(setup, "pars_dsp_dplms", time),
     group:
         "par-dsp"
     resources:
@@ -187,7 +187,7 @@ rule build_pars_dsp_eopt_geds:
         ),
         plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_eopt"),
+        get_pattern_log_channel(setup, "pars_dsp_eopt", time),
     group:
         "par-dsp"
     resources:
@@ -217,7 +217,7 @@ rule build_svm_dsp_geds:
     output:
         dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal"),
+        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal", time),
     group:
         "par-dsp-svm"
     resources:
@@ -238,7 +238,7 @@ rule build_pars_dsp_svm_geds:
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_svm"),
+        get_pattern_log_channel(setup, "pars_dsp_svm", time),
     group:
         "par-dsp"
     resources:
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index d14b8cb..02b7849 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -35,7 +35,7 @@ rule build_evt:
         tier="evt",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, f"tier_evt"),
+        get_pattern_log(setup, f"tier_evt", time),
     group:
         "tier-evt"
     resources:
@@ -88,7 +88,7 @@ rule build_pet:
         tier="pet",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, f"tier_pet"),
+        get_pattern_log(setup, f"tier_pet", time),
     group:
         "tier-evt"
     resources:
@@ -140,7 +140,7 @@ for evt_tier in ("evt", "pet"):
             lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat",
             ro_input=lambda _, input: utils.as_ro(setup, input),
         log:
-            get_pattern_log_concat(setup, f"tier_{evt_tier}_concat"),
+            get_pattern_log_concat(setup, f"tier_{evt_tier}_concat", time),
         group:
             "tier-evt"
         shell:
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 0af7590..1938779 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -53,7 +53,7 @@ rule build_qc:
         qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_qc"),
+        get_pattern_log_channel(setup, "pars_hit_qc", time),
     group:
         "par-hit"
     resources:
@@ -102,7 +102,7 @@ rule build_energy_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "energy_cal")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_energy_cal"),
+        get_pattern_log_channel(setup, "pars_hit_energy_cal", time),
     group:
         "par-hit"
     resources:
@@ -151,7 +151,7 @@ rule build_aoe_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_aoe_cal"),
+        get_pattern_log_channel(setup, "pars_hit_aoe_cal", time),
     group:
         "par-hit"
     resources:
@@ -198,7 +198,7 @@ rule build_lq_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_lq_cal"),
+        get_pattern_log_channel(setup, "pars_hit_lq_cal", time),
     group:
         "par-hit"
     resources:
@@ -325,7 +325,7 @@ rule build_hit:
         tier="hit",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, "tier_hit"),
+        get_pattern_log(setup, "tier_hit", time),
     group:
         "tier-hit"
     resources:
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index 27e4f81..937cd27 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -117,6 +117,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "pht",
+                    time,
                     name="par_pht_qc",
                 ),
             group:
@@ -171,7 +172,7 @@ rule build_pht_qc:
         hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")),
     log:
-        get_pattern_log_channel(setup, "par_pht_qc"),
+        get_pattern_log_channel(setup, "par_pht_qc", time),
     group:
         "par-pht"
     resources:
@@ -234,7 +235,7 @@ rule build_per_energy_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")),
     log:
-        get_pattern_log_channel(setup, "par_pht_energy_cal"),
+        get_pattern_log_channel(setup, "par_pht_energy_cal", time),
     group:
         "par-pht"
     resources:
@@ -344,6 +345,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "pht",
+                    time,
                     name="par_pht_partcal",
                 ),
             group:
@@ -406,7 +408,7 @@ rule build_pht_energy_super_calibrations:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")),
     log:
-        get_pattern_log_channel(setup, "par_pht_partcal"),
+        get_pattern_log_channel(setup, "par_pht_partcal", time),
     group:
         "par-pht"
     resources:
@@ -526,6 +528,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "pht",
+                    time,
                     name="par_pht_aoe",
                 ),
             group:
@@ -588,7 +591,7 @@ rule build_pht_aoe_calibrations:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")),
     log:
-        get_pattern_log_channel(setup, "par_pht_aoe_cal"),
+        get_pattern_log_channel(setup, "par_pht_aoe_cal", time),
     group:
         "par-pht"
     resources:
@@ -706,6 +709,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "pht",
+                    time,
                     name="par_pht_lq",
                 ),
             group:
@@ -763,7 +767,7 @@ rule build_pht_lq_calibration:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")),
     log:
-        get_pattern_log_channel(setup, "par_pht_lq_cal"),
+        get_pattern_log_channel(setup, "par_pht_lq_cal", time),
     group:
         "par-pht"
     resources:
@@ -893,7 +897,7 @@ rule build_pht:
         tier="pht",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, "tier_pht"),
+        get_pattern_log(setup, "tier_pht", time),
     group:
         "tier-pht"
     resources:
diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk
index 75d8e7e..b017e89 100644
--- a/workflow/rules/pht_fast.smk
+++ b/workflow/rules/pht_fast.smk
@@ -96,6 +96,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "pht",
+                    time,
                     name="par_pht_fast",
                 ),
             group:
@@ -156,7 +157,7 @@ rule par_pht_fast:
         ),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")),
     log:
-        get_pattern_log_channel(setup, "par_pht_fast"),
+        get_pattern_log_channel(setup, "par_pht_fast", time),
     group:
         "par-pht"
     resources:
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index d55fbcc..02df934 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -169,7 +169,7 @@ rule build_psp:
         tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle),
         db_file=get_pattern_pars_tmp(setup, "psp_db"),
     log:
-        get_pattern_log(setup, "tier_psp"),
+        get_pattern_log(setup, "tier_psp", time),
     group:
         "tier-dsp"
     resources:
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 9e14cad..a7938f4 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -87,6 +87,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "psp",
+                    time,
                     name="par_psp",
                 ),
             group:
@@ -134,7 +135,7 @@ rule build_par_psp:
         ),
         psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
     log:
-        get_pattern_log_channel(setup, "pars_psp"),
+        get_pattern_log_channel(setup, "pars_psp", time),
     group:
         "par-psp"
     resources:
@@ -174,7 +175,7 @@ rule build_svm_psp:
     output:
         dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_psp_svm").replace("{datatype}", "cal"),
+        get_pattern_log(setup, "pars_psp_svm", time).replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -195,7 +196,7 @@ rule build_pars_psp_svm:
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_svm"),
+        get_pattern_log_channel(setup, "pars_dsp_svm", time),
     group:
         "par-dsp"
     resources:
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index 982ab4e..b04f1ef 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -57,6 +57,7 @@ for key, dataset in part.datasets.items():
                     partition,
                     key,
                     "pht",
+                    time,
                     name="par_pht_qc_phy",
                 ),
             group:
@@ -100,7 +101,7 @@ rule build_pht_qc_phy:
         hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")),
         plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_qc_phy"),
+        get_pattern_log_channel(setup, "pars_pht_qc_phy", time),
     group:
         "par-pht"
     resources:
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index f647095..4bd1e7a 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -32,7 +32,7 @@ rule build_raw_orca:
     output:
         get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
     log:
-        get_pattern_log(setup, "tier_raw"),
+        get_pattern_log(setup, "tier_raw", time),
     group:
         "tier-raw"
     resources:
@@ -62,7 +62,7 @@ rule build_raw_fcio:
     output:
         get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
     log:
-        get_pattern_log(setup, "tier_raw"),
+        get_pattern_log(setup, "tier_raw", time),
     group:
         "tier-raw"
     resources:
@@ -96,7 +96,7 @@ rule build_raw_blind:
     output:
         get_pattern_tier_raw_blind(setup),
     log:
-        str(get_pattern_log(setup, "tier_raw_blind")).replace("{datatype}", "phy"),
+        str(get_pattern_log(setup, "tier_raw_blind", time)).replace("{datatype}", "phy"),
     group:
         "tier-raw"
     resources:
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index d3c5d51..ac05738 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -20,7 +20,7 @@ rule build_skm:
         datatype="phy",
         ro_input=lambda _, input: ro(input),
     log:
-        get_pattern_log_concat(setup, "tier_skm"),
+        get_pattern_log_concat(setup, "tier_skm", time),
     group:
         "tier-skm"
     resources:
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index 6fa85a9..2bc1686 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -21,7 +21,7 @@ rule build_tier_tcm:
     output:
         get_pattern_tier(setup, "tcm", check_in_cycle=check_in_cycle),
     log:
-        get_pattern_log(setup, "tier_tcm"),
+        get_pattern_log(setup, "tier_tcm", time),
     group:
         "tier-tcm"
     resources:
@@ -51,7 +51,7 @@ rule build_pulser_ids:
     output:
         pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")),
     log:
-        get_pattern_log_channel(setup, "tcm_pulsers"),
+        get_pattern_log_channel(setup, "tcm_pulsers", time),
     group:
         "tier-tcm"
     resources:
diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py
index ce06c1d..5c19ea7 100644
--- a/workflow/src/legenddataflow/cal_grouping.py
+++ b/workflow/src/legenddataflow/cal_grouping.py
@@ -170,6 +170,7 @@ def get_log_file(
         dataset,
         channel,
         tier,
+        processing_timestamp,
         experiment="l200",
         datatype="cal",
         name=None,
@@ -188,7 +189,9 @@ def get_log_file(
             fk.channel = "{channel}"
         else:
             fk.channel = channel
-        return fk.get_path_from_filekey(get_pattern_log_channel(self.setup, name))[0]
+        return fk.get_path_from_filekey(
+            get_pattern_log_channel(self.setup, name, processing_timestamp)
+        )[0]
 
     def get_timestamp(
         self, catalog, dataset, channel, tier, experiment="l200", datatype="cal"
diff --git a/workflow/src/legenddataflow/patterns.py b/workflow/src/legenddataflow/patterns.py
index 71f5db4..b05be0a 100644
--- a/workflow/src/legenddataflow/patterns.py
+++ b/workflow/src/legenddataflow/patterns.py
@@ -289,9 +289,10 @@ def get_pattern_plts(setup, tier, name=None):
         )
 
 
-def get_pattern_log(setup, processing_step):
+def get_pattern_log(setup, processing_step, time):
     return (
         Path(f"{tmp_log_path(setup)}")
+        / time
         / processing_step
         / (
             "{experiment}-{period}-{run}-{datatype}-{timestamp}-"
@@ -301,9 +302,10 @@ def get_pattern_log(setup, processing_step):
     )
 
 
-def get_pattern_log_channel(setup, processing_step):
+def get_pattern_log_channel(setup, processing_step, time):
     return (
         Path(f"{tmp_log_path(setup)}")
+        / time
         / processing_step
         / (
             "{experiment}-{period}-{run}-cal-{timestamp}-{channel}-"
@@ -313,9 +315,10 @@ def get_pattern_log_channel(setup, processing_step):
     )
 
 
-def get_pattern_log_concat(setup, processing_step):
+def get_pattern_log_concat(setup, processing_step, time):
     return (
         Path(f"{tmp_log_path(setup)}")
+        / time
         / processing_step
         / ("{experiment}-{period}-{run}-{datatype}-" + processing_step + ".log")
     )

From faf9214dbed37d3e2ff60953aed6237a8046a070 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Mon, 3 Feb 2025 23:06:50 +0100
Subject: [PATCH 077/101] setup to config and debug

---
 workflow/rules/ann.smk                  |  20 ++--
 workflow/rules/blinding_calibration.smk |  18 ++--
 workflow/rules/blinding_check.smk       |  18 ++--
 workflow/rules/chanlist_gen.smk         |  42 ++++----
 workflow/rules/common.smk               |  28 +++---
 workflow/rules/dsp.smk                  |  38 ++++----
 workflow/rules/dsp_pars_geds.smk        |  88 ++++++++---------
 workflow/rules/evt.smk                  |  42 ++++----
 workflow/rules/filelist_gen.smk         |  36 +++----
 workflow/rules/hit.smk                  |  96 +++++++++----------
 workflow/rules/main.smk                 |  12 +--
 workflow/rules/pht.smk                  | 122 ++++++++++++------------
 workflow/rules/pht_fast.smk             |  18 ++--
 workflow/rules/psp.smk                  |  38 ++++----
 workflow/rules/psp_pars_geds.smk        |  42 ++++----
 workflow/rules/qc_phy.smk               |  18 ++--
 workflow/rules/raw.smk                  |  26 ++---
 workflow/rules/skm.smk                  |   6 +-
 workflow/rules/tcm.smk                  |  12 +--
 19 files changed, 361 insertions(+), 359 deletions(-)

diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index d572b06..7a50005 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -13,16 +13,16 @@ from legenddataflow.patterns import (
 
 rule build_ann:
     input:
-        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
+        dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False),
         pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
     output:
-        tier_file=get_pattern_tier(setup, "ann", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "ann_db"),
+        tier_file=get_pattern_tier(config, "ann", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(config, "ann_db"),
     log:
-        get_pattern_log(setup, "tier_ann", time),
+        get_pattern_log(config, "tier_ann", time),
     group:
         "tier-ann"
     resources:
@@ -30,7 +30,7 @@ rule build_ann:
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
         "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -45,16 +45,16 @@ rule build_ann:
 
 rule build_pan:
     input:
-        dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
+        dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False),
         pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
     output:
-        tier_file=get_pattern_tier(setup, "pan", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "pan_db"),
+        tier_file=get_pattern_tier(config, "pan", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(config, "pan_db"),
     log:
-        get_pattern_log(setup, "tier_pan", time),
+        get_pattern_log(config, "tier_pan", time),
     group:
         "tier-ann"
     resources:
@@ -62,7 +62,7 @@ rule build_pan:
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
         "{swenv} python3 -B "
-        f"{workflow.source_path('../scripts/build_dsp.py')} "
+        "{basedir}/../scripts/build_dsp.py "
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index 537a051..8407893 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -20,7 +20,7 @@ rule build_blinding_calibration:
     if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data
     """
     input:
-        files=Path(filelist_path(setup))
+        files=Path(filelist_path(config))
         / "all-{experiment}-{period}-{run}-cal-raw.filelist",
     params:
         timestamp="{timestamp}",
@@ -28,10 +28,10 @@ rule build_blinding_calibration:
         channel="{channel}",
         meta=meta,
     output:
-        par_file=temp(get_pattern_pars_tmp_channel(setup, "raw_blindcal")),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw_blindcal")),
+        par_file=temp(get_pattern_pars_tmp_channel(config, "raw_blindcal")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "raw_blindcal")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_blind_cal", time),
+        get_pattern_log_channel(config, "pars_hit_blind_cal", time),
     group:
         "par-raw-blinding"
     resources:
@@ -53,7 +53,7 @@ rule build_blinding_calibration:
 rule build_plts_blinding:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
@@ -62,7 +62,7 @@ rule build_plts_blinding:
             name="blindcal",
         ),
     output:
-        get_pattern_plts(setup, "raw", name="blindcal"),
+        get_pattern_plts(config, "raw", name="blindcal"),
     group:
         "merge-blindcal"
     shell:
@@ -75,7 +75,7 @@ rule build_plts_blinding:
 rule build_pars_blinding:
     input:
         infiles=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
@@ -83,9 +83,9 @@ rule build_pars_blinding:
             chan_maps,
             name="blindcal",
         ),
-        plts=get_pattern_plts(setup, "raw", name="blindcal"),
+        plts=get_pattern_plts(config, "raw", name="blindcal"),
     output:
-        get_pattern_pars(setup, "raw", name="blindcal", check_in_cycle=check_in_cycle),
+        get_pattern_pars(config, "raw", name="blindcal", check_in_cycle=check_in_cycle),
     group:
         "merge-blindcal"
     shell:
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index ba552fc..916009f 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -21,7 +21,7 @@ rule build_blinding_check:
     if so creates a file whose existence will be checked by the raw blinding before proceeding with blinding the phy data
     """
     input:
-        files=Path(filelist_path(setup))
+        files=Path(filelist_path(config))
         / "all-{experiment}-{period}-{run}-cal-raw.filelist",
         par_file=get_blinding_curve_file,
     params:
@@ -29,10 +29,10 @@ rule build_blinding_check:
         datatype="cal",
         channel="{channel}",
     output:
-        check_file=temp(get_pattern_pars_tmp_channel(setup, "raw")),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "raw")),
+        check_file=temp(get_pattern_pars_tmp_channel(config, "raw")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "raw")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_blind_check", time),
+        get_pattern_log_channel(config, "pars_hit_blind_check", time),
     group:
         "par-hit"
     resources:
@@ -55,7 +55,7 @@ rule build_blinding_check:
 rule build_plts_raw:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
@@ -63,7 +63,7 @@ rule build_plts_raw:
             chan_maps,
         ),
     output:
-        get_pattern_plts(setup, "raw"),
+        get_pattern_plts(config, "raw"),
     group:
         "merge-raw"
     shell:
@@ -76,7 +76,7 @@ rule build_plts_raw:
 rule build_pars_raw:
     input:
         infiles=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "raw",
             basedir,
@@ -84,11 +84,11 @@ rule build_pars_raw:
             chan_maps,
         ),
         plts=get_pattern_plts(
-            setup,
+            config,
             "raw",
         ),
     output:
-        get_pattern_pars(setup, "raw", check_in_cycle=check_in_cycle),
+        get_pattern_pars(config, "raw", check_in_cycle=check_in_cycle),
     group:
         "merge-raw"
     shell:
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
index 4e46f13..06200e3 100644
--- a/workflow/rules/chanlist_gen.smk
+++ b/workflow/rules/chanlist_gen.smk
@@ -9,16 +9,11 @@ from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
 )
-from legenddataflow.utils import filelist_path, runcmd
+from legenddataflow import execenv_smk_py_script
+from legenddataflow.utils import filelist_path
 
 
-def get_par_chanlist(
-    setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml"
-):
-    tier_pattern = "((?P<file_type>[^_]+)(\\_(?P<tier>[^_]+)(\\_(?P<name>[^_]+)?)?)?)?"
-    keypart_rx = re.compile(tier_pattern)
-    d = keypart_rx.match(tier).groupdict()
-
+def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps):
     key = ChannelProcKey.parse_keypart(keypart)
 
     flist_path = filelist_path(setup)
@@ -28,37 +23,36 @@ def get_par_chanlist(
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}"
-    cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}"
+    cmd = "create_chankeylist"  # execenv_smk_py_script(workflow, config,  )[0]
+    cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
+    cmd += f"--datatype cal --output_file {output_file}"
     os.system(cmd)
 
     with open(output_file) as r:
         chan_list = r.read().splitlines()
+    os.remove(output_file)
+    return chan_list
+
+
+def get_par_chanlist(
+    setup, keypart, tier, basedir, det_status, chan_maps, name=None, extension="yaml"
+):
+
+    chan_list = get_chanlist(setup, keypart, workflow, config, det_status, chan_maps)
 
     par_pattern = get_pattern_pars_tmp_channel(setup, tier, name, extension)
 
     filenames = ChannelProcKey.get_channel_files(keypart, par_pattern, chan_list)
-    os.remove(output_file)
+
     return filenames
 
 
 def get_plt_chanlist(setup, keypart, tier, basedir, det_status, chan_maps, name=None):
-    key = ChannelProcKey.parse_keypart(keypart)
-
-    output_file = os.path.join(
-        filelist_path(setup),
-        f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
-    )
 
-    cmd = f"{runcmd(setup)} python3 -B {basedir}/scripts/create_chankeylist.py --det_status {det_status}"
-    cmd += f" --channelmap {chan_maps} --timestamp {key.timestamp} --datatype cal --output_file {output_file}"
-    os.system(cmd)
-
-    with open(output_file) as r:
-        chan_list = r.read().splitlines()
+    chan_list = get_chanlist(setup, keypart, workflow, config, det_status, chan_maps)
 
     par_pattern = get_pattern_plts_tmp_channel(setup, tier, name)
 
     filenames = ChannelProcKey.get_channel_files(keypart, par_pattern, chan_list)
-    os.remove(output_file)
+
     return filenames
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 17571e3..5a9bff2 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -10,20 +10,20 @@ from legenddataflow import utils
 
 
 def ro(path):
-    return utils.as_ro(setup, path)
+    return utils.as_ro(config, path)
 
 
 def get_blinding_curve_file(wildcards):
     """func to get the blinding calibration curves from the overrides"""
     par_files = Catalog.get_files(
-        Path(patt.par_overwrite_path(setup)) / "raw" / "validity.yaml",
+        Path(patt.par_overwrite_path(config)) / "raw" / "validity.yaml",
         wildcards.timestamp,
     )
     if isinstance(par_files, str):
-        return str(Path(patt.par_overwrite_path(setup)) / "raw" / par_files)
+        return str(Path(patt.par_overwrite_path(config)) / "raw" / par_files)
     else:
         return [
-            str(Path(patt.par_overwrite_path(setup)) / "raw" / par_file)
+            str(Path(patt.par_overwrite_path(config)) / "raw" / par_file)
             for par_file in par_files
         ]
 
@@ -31,13 +31,13 @@ def get_blinding_curve_file(wildcards):
 def get_blinding_check_file(wildcards):
     """func to get the right blinding check file"""
     par_files = Catalog.get_files(
-        Path(patt.get_pars_path(setup, "raw")) / "validity.yaml", wildcards.timestamp
+        Path(patt.get_pars_path(config, "raw")) / "validity.yaml", wildcards.timestamp
     )
     if isinstance(par_files, str):
-        return Path(patt.get_pars_path(setup, "raw")) / par_files
+        return Path(patt.get_pars_path(config, "raw")) / par_files
     else:
         return [
-            Path(patt.get_pars_path(setup, "raw")) / par_file for par_file in par_files
+            Path(patt.get_pars_path(config, "raw")) / par_file for par_file in par_files
         ]
 
 
@@ -67,19 +67,19 @@ def set_last_rule_name(workflow, new_name):
 
 
 def get_input_par_file(wildcards, tier, name):
-    par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml"
+    par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml"
     pars_files_overwrite = Catalog.get_files(
         par_overwrite_file,
         wildcards.timestamp,
     )
     for pars_file in pars_files_overwrite:
         if name in str(pars_file):
-            return Path(patt.par_overwrite_path(setup)) / tier / pars_file
+            return Path(patt.par_overwrite_path(config)) / tier / pars_file
     raise ValueError(f"Could not find model in {pars_files_overwrite}")
 
 
 def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
-    par_overwrite_file = Path(patt.par_overwrite_path(setup)) / tier / "validity.yaml"
+    par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml"
     if timestamp is not None:
         pars_files_overwrite = Catalog.get_files(
             par_overwrite_file,
@@ -97,7 +97,7 @@ def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):
     out_files = []
     for pars_file in pars_files_overwrite:
         if fullname in str(pars_file):
-            out_files.append(Path(patt.par_overwrite_path(setup)) / tier / pars_file)
+            out_files.append(Path(patt.par_overwrite_path(config)) / tier / pars_file)
     if len(out_files) == 0:
         raise ValueError(f"Could not find name in {pars_files_overwrite}")
     else:
@@ -109,8 +109,8 @@ def get_search_pattern(tier):
     This func gets the search pattern for the relevant tier passed.
     """
     if tier == "daq":
-        return patt.get_pattern_tier_daq_unsorted(setup, extension="*")
+        return patt.get_pattern_tier_daq_unsorted(config, extension="*")
     elif tier == "raw":
-        return patt.get_pattern_tier_daq(setup, extension="*")
+        return patt.get_pattern_tier_daq(config, extension="*")
     else:
-        return patt.get_pattern_tier(setup, "raw", check_in_cycle=False)
+        return patt.get_pattern_tier(config, "raw", check_in_cycle=False)
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index d40de3d..8340dc8 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -18,11 +18,11 @@ from legenddataflow.patterns import (
 
 dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier(setup, "raw", check_in_cycle=False),
+    get_pattern_tier(config, "raw", check_in_cycle=False),
     {"cal": ["par_dsp"], "lar": ["par_dsp"]},
 )
 
-dsp_par_cat_file = Path(pars_path(setup)) / "dsp" / "validity.yaml"
+dsp_par_cat_file = Path(pars_path(config)) / "dsp" / "validity.yaml"
 if dsp_par_cat_file.is_file():
     dsp_par_cat_file.unlink()
 Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
@@ -32,7 +32,7 @@ ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
 rule build_plts_dsp:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
@@ -43,7 +43,7 @@ rule build_plts_dsp:
         timestamp="{timestamp}",
         datatype="cal",
     output:
-        get_pattern_plts(setup, "dsp"),
+        get_pattern_plts(config, "dsp"),
     group:
         "merge-dsp"
     shell:
@@ -57,7 +57,7 @@ rule build_plts_dsp:
 rule build_pars_dsp_objects:
     input:
         lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
@@ -71,7 +71,7 @@ rule build_pars_dsp_objects:
         datatype="cal",
     output:
         get_pattern_pars(
-            setup,
+            config,
             "dsp",
             name="objects",
             extension="dir",
@@ -91,7 +91,7 @@ rule build_pars_dsp_objects:
 rule build_pars_dsp_db:
     input:
         lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
@@ -104,7 +104,7 @@ rule build_pars_dsp_db:
     output:
         temp(
             get_pattern_pars_tmp(
-                setup,
+                config,
                 "dsp",
                 datatype="cal",
             )
@@ -123,7 +123,7 @@ rule build_pars_dsp_db:
 rule build_pars_dsp:
     input:
         in_files=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
@@ -133,13 +133,13 @@ rule build_pars_dsp:
             extension="lh5",
         ),
         in_db=get_pattern_pars_tmp(
-            setup,
+            config,
             "dsp",
             datatype="cal",
         ),
-        plts=get_pattern_plts(setup, "dsp"),
+        plts=get_pattern_plts(config, "dsp"),
         objects=get_pattern_pars(
-            setup,
+            config,
             "dsp",
             name="objects",
             extension="dir",
@@ -150,12 +150,12 @@ rule build_pars_dsp:
         datatype="cal",
     output:
         out_file=get_pattern_pars(
-            setup,
+            config,
             "dsp",
             extension="lh5",
             check_in_cycle=check_in_cycle,
         ),
-        out_db=get_pattern_pars(setup, "dsp", check_in_cycle=check_in_cycle),
+        out_db=get_pattern_pars(config, "dsp", check_in_cycle=check_in_cycle),
     group:
         "merge-dsp"
     shell:
@@ -171,10 +171,10 @@ rule build_pars_dsp:
 
 rule build_dsp:
     input:
-        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
+        raw_file=get_pattern_tier(config, "raw", check_in_cycle=False),
         pars_file=ancient(
             lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "dsp"
+                config, wildcards.timestamp, "dsp"
             )
         ),
     params:
@@ -182,10 +182,10 @@ rule build_dsp:
         datatype="{datatype}",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     output:
-        tier_file=get_pattern_tier(setup, "dsp", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "dsp_db"),
+        tier_file=get_pattern_tier(config, "dsp", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(config, "dsp_db"),
     log:
-        get_pattern_log(setup, "tier_dsp", time),
+        get_pattern_log(config, "tier_dsp", time),
     group:
         "tier-dsp"
     resources:
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 8f030cb..9fe1391 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -4,19 +4,19 @@ Snakemake rules for building dsp pars for HPGes, before running build_dsp()
 - extraction of energy filter parameters and charge trapping correction for each channel from cal data
 """
 
-from legenddataflow.create_pars_keylist import pars_key_resolve
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
-    get_pattern_tier_raw,
+    get_pattern_tier,
     get_pattern_log,
     get_pattern_pars,
 )
 
-dsp_par_catalog = pars_key_resolve.get_par_catalog(
+dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(config, "raw", check_in_cycle=False),
     {"cal": ["par_dsp"], "lar": ["par_dsp"]},
 )
 
@@ -24,18 +24,18 @@ dsp_par_catalog = pars_key_resolve.get_par_catalog(
 rule build_pars_dsp_tau_geds:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        decay_const=temp(get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant")),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant")),
+        decay_const=temp(get_pattern_pars_tmp_channel(config, "dsp", "decay_constant")),
+        plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "decay_constant")),
     log:
-        get_pattern_log_channel(setup, "par_dsp_decay_constant", time),
+        get_pattern_log_channel(config, "par_dsp_decay_constant", time),
     group:
         "par-dsp"
     resources:
@@ -57,19 +57,19 @@ rule build_pars_dsp_tau_geds:
 rule build_pars_evtsel_geds:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-raw.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-raw.filelist"
         ),
-        pulser_file=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
+        pulser_file=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"),
         raw_cal=get_blinding_curve_file,
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        peak_file=temp(get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5")),
+        peak_file=temp(get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5")),
     log:
-        get_pattern_log_channel(setup, "par_dsp_event_selection", time),
+        get_pattern_log_channel(config, "par_dsp_event_selection", time),
     group:
         "par-dsp"
     resources:
@@ -94,21 +94,21 @@ rule build_pars_evtsel_geds:
 rule build_pars_dsp_nopt_geds:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist"
         ),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "decay_constant"),
-        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "decay_constant"),
+        database=get_pattern_pars_tmp_channel(config, "dsp", "decay_constant"),
+        inplots=get_pattern_plts_tmp_channel(config, "dsp", "decay_constant"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
         dsp_pars_nopt=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization")
+            get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization")
         ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization")),
+        plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization")),
     log:
-        get_pattern_log_channel(setup, "par_dsp_noise_optimization", time),
+        get_pattern_log_channel(config, "par_dsp_noise_optimization", time),
     group:
         "par-dsp"
     resources:
@@ -132,23 +132,23 @@ rule build_pars_dsp_nopt_geds:
 rule build_pars_dsp_dplms_geds:
     input:
         fft_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-raw.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-fft-raw.filelist"
         ),
-        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
-        database=get_pattern_pars_tmp_channel(setup, "dsp", "noise_optimization"),
-        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "noise_optimization"),
+        peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5"),
+        database=get_pattern_pars_tmp_channel(config, "dsp", "noise_optimization"),
+        inplots=get_pattern_plts_tmp_channel(config, "dsp", "noise_optimization"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp", "dplms")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")),
         lh5_path=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "dplms", extension="lh5")
+            get_pattern_pars_tmp_channel(config, "dsp", "dplms", extension="lh5")
         ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp", "dplms")),
+        plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "dplms")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_dplms", time),
+        get_pattern_log_channel(config, "pars_dsp_dplms", time),
     group:
         "par-dsp"
     resources:
@@ -173,21 +173,21 @@ rule build_pars_dsp_dplms_geds:
 # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
 rule build_pars_dsp_eopt_geds:
     input:
-        peak_file=get_pattern_pars_tmp_channel(setup, "dsp", "peaks", "lh5"),
-        decay_const=get_pattern_pars_tmp_channel(setup, "dsp", "dplms"),
-        inplots=get_pattern_plts_tmp_channel(setup, "dsp", "dplms"),
+        peak_file=get_pattern_pars_tmp_channel(config, "dsp", "peaks", "lh5"),
+        decay_const=get_pattern_pars_tmp_channel(config, "dsp", "dplms"),
+        inplots=get_pattern_plts_tmp_channel(config, "dsp", "dplms"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp_eopt")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp_eopt")),
         qbb_grid=temp(
-            get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl")
+            get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl")
         ),
-        plots=temp(get_pattern_plts_tmp_channel(setup, "dsp")),
+        plots=temp(get_pattern_plts_tmp_channel(config, "dsp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_eopt", time),
+        get_pattern_log_channel(config, "pars_dsp_eopt", time),
     group:
         "par-dsp"
     resources:
@@ -210,14 +210,16 @@ rule build_pars_dsp_eopt_geds:
 
 rule build_svm_dsp_geds:
     input:
-        hyperpars=lambda wildcards: get_svm_file(wildcards, "dsp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(
+        hyperpars=lambda wildcards: get_input_par_file(
             wildcards, "dsp", "svm_hyperpars"
+        ),
+        train_data=lambda wildcards: str(
+            get_input_par_file(wildcards, "dsp", "svm_hyperpars")
         ).replace("hyperpars.json", "train.lh5"),
     output:
-        dsp_pars=get_pattern_pars(setup, "dsp", "svm", "pkl"),
+        dsp_pars=get_pattern_pars(config, "dsp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_dsp_svm").replace("{datatype}", "cal", time),
+        str(get_pattern_log(config, "pars_dsp_svm", time)).replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -233,12 +235,12 @@ rule build_svm_dsp_geds:
 
 rule build_pars_dsp_svm_geds:
     input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp_eopt"),
-        svm_file=get_pattern_pars(setup, "dsp", "svm", "pkl"),
+        dsp_pars=get_pattern_pars_tmp_channel(config, "dsp_eopt"),
+        svm_file=get_pattern_pars(config, "dsp", "svm", "pkl"),
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "dsp")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_svm", time),
+        get_pattern_log_channel(config, "pars_dsp_svm", time),
     group:
         "par-dsp"
     resources:
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index 02b7849..bb90ce8 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -13,29 +13,29 @@ from legenddataflow.patterns import (
 
 rule build_evt:
     input:
-        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
-        hit_file=get_pattern_tier(setup, "hit", check_in_cycle=False),
-        tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
+        dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False),
+        hit_file=get_pattern_tier(config, "hit", check_in_cycle=False),
+        tcm_file=get_pattern_tier(config, "tcm", check_in_cycle=False),
         ann_file=lambda wildcards: (
             None
             if int(wildcards["period"][1:]) > 11
-            else get_pattern_tier(setup, "ann", check_in_cycle=False)
+            else get_pattern_tier(config, "ann", check_in_cycle=False)
         ),
         par_files=lambda wildcards: ParsCatalog.get_par_file(
-            setup, wildcards.timestamp, "hit"
+            config, wildcards.timestamp, "hit"
         ),
         xtalk_matrix=lambda wildcards: get_input_par_file(
             tier="evt", wildcards=wildcards, name="xtc"
         ),
     output:
-        get_pattern_tier(setup, "evt", check_in_cycle=check_in_cycle),
+        get_pattern_tier(config, "evt", check_in_cycle=check_in_cycle),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         tier="evt",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, f"tier_evt", time),
+        get_pattern_log(config, f"tier_evt", time),
     group:
         "tier-evt"
     resources:
@@ -66,29 +66,29 @@ rule build_evt:
 
 rule build_pet:
     input:
-        dsp_file=get_pattern_tier(setup, "psp", check_in_cycle=False),
-        hit_file=get_pattern_tier(setup, "pht", check_in_cycle=False),
-        tcm_file=get_pattern_tier(setup, "tcm", check_in_cycle=False),
+        dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False),
+        hit_file=get_pattern_tier(config, "pht", check_in_cycle=False),
+        tcm_file=get_pattern_tier(config, "tcm", check_in_cycle=False),
         ann_file=lambda wildcards: (
             None
             if int(wildcards["period"][1:]) > 11
-            else get_pattern_tier(setup, "pan", check_in_cycle=False)
+            else get_pattern_tier(config, "pan", check_in_cycle=False)
         ),
         par_files=lambda wildcards: ParsCatalog.get_par_file(
-            setup, wildcards.timestamp, "pht"
+            config, wildcards.timestamp, "pht"
         ),
         xtalk_matrix=lambda wildcards: get_input_par_file(
             tier="pet", wildcards=wildcards, name="xtc"
         ),
     output:
-        get_pattern_tier(setup, "pet", check_in_cycle=check_in_cycle),
+        get_pattern_tier(config, "pet", check_in_cycle=check_in_cycle),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         tier="pet",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, f"tier_pet", time),
+        get_pattern_log(config, f"tier_pet", time),
     group:
         "tier-evt"
     resources:
@@ -126,21 +126,23 @@ for evt_tier in ("evt", "pet"):
             lambda wildcards: sorted(
                 get_filelist_full_wildcards(
                     wildcards,
-                    setup,
-                    get_pattern_tier_raw(setup),
+                    config,
+                    get_pattern_tier_raw(config),
                     tier,
                     ignore_keys_file=os.path.join(configs, "ignore_keys.keylist"),
                 )
             ),
         output:
-            get_pattern_tier(setup, f"{evt_tier}_concat", check_in_cycle=check_in_cycle),
+            get_pattern_tier(
+                config, f"{evt_tier}_concat", check_in_cycle=check_in_cycle
+            ),
         params:
             timestamp="all",
             datatype="{datatype}",
-            lh5concat_exe=setup["paths"]["install"] + "/bin/lh5concat",
-            ro_input=lambda _, input: utils.as_ro(setup, input),
+            lh5concat_exe=config["paths"]["install"] + "/bin/lh5concat",
+            ro_input=lambda _, input: utils.as_ro(config, input),
         log:
-            get_pattern_log_concat(setup, f"tier_{evt_tier}_concat", time),
+            get_pattern_log_concat(config, f"tier_{evt_tier}_concat", time),
         group:
             "tier-evt"
         shell:
diff --git a/workflow/rules/filelist_gen.smk b/workflow/rules/filelist_gen.smk
index 32d6175..d92a5aa 100644
--- a/workflow/rules/filelist_gen.smk
+++ b/workflow/rules/filelist_gen.smk
@@ -105,30 +105,30 @@ def get_keys(keypart):
     return filekeys
 
 
-def get_pattern(setup, tier):
+def get_pattern(config, tier):
     """
     Helper function to get the search pattern for the given tier,
     some tiers such as skm need to refer to a different pattern when looking for files
     as only phy files are taken to skm others are only taken to pet
     """
     if tier == "blind":
-        fn_pattern = patt.get_pattern_tier(setup, "raw", check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(config, "raw", check_in_cycle=False)
     elif tier in ("skm", "pet_concat"):
-        fn_pattern = patt.get_pattern_tier(setup, "pet", check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(config, "pet", check_in_cycle=False)
     elif tier == "evt_concat":
-        fn_pattern = patt.get_pattern_tier(setup, "evt", check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(config, "evt", check_in_cycle=False)
     elif tier == "daq":
-        fn_pattern = patt.get_pattern_tier_daq(setup, extension="{ext}")
+        fn_pattern = patt.get_pattern_tier_daq(config, extension="{ext}")
     else:
-        fn_pattern = patt.get_pattern_tier(setup, tier, check_in_cycle=False)
+        fn_pattern = patt.get_pattern_tier(config, tier, check_in_cycle=False)
     return fn_pattern
 
 
-def concat_phy_filenames(setup, phy_filenames, tier):
+def concat_phy_filenames(config, phy_filenames, tier):
     """
     This function concatenates the files from the same run together
     """
-    fn_pattern = patt.get_pattern(setup, tier)
+    fn_pattern = patt.get_pattern(config, tier)
     # group files by run
     sorted_phy_filenames = patt.run_grouper(phy_filenames)
     phy_filenames = []
@@ -136,7 +136,7 @@ def concat_phy_filenames(setup, phy_filenames, tier):
     for run in sorted_phy_filenames:
         key = FileKey.get_filekey_from_pattern(run[0], fn_pattern)
         out_key = FileKey.get_path_from_filekey(
-            key, patt.get_pattern_tier(setup, tier, check_in_cycle=False)
+            key, patt.get_pattern_tier(config, tier, check_in_cycle=False)
         )[0]
 
         phy_filenames.append(out_key)
@@ -145,7 +145,7 @@ def concat_phy_filenames(setup, phy_filenames, tier):
 
 
 def build_filelist(
-    setup,
+    config,
     filekeys,
     search_pattern,
     tier,
@@ -157,7 +157,7 @@ def build_filelist(
     and tier. It will ignore any keys in the ignore_keys list and only include
     the keys specified in the analysis_runs dict.
     """
-    fn_pattern = get_pattern(setup, tier)
+    fn_pattern = get_pattern(config, tier)
 
     if ignore_keys is None:
         ignore_keys = []
@@ -177,11 +177,11 @@ def build_filelist(
             else:
                 if tier == "blind" and _key.datatype in blind_datatypes:
                     filename = FileKey.get_path_from_filekey(
-                        _key, patt.get_pattern_tier_raw_blind(setup)
+                        _key, patt.get_pattern_tier_raw_blind(config)
                     )
                 elif tier == "skm":
                     filename = FileKey.get_path_from_filekey(
-                        _key, patt.get_pattern_tier(setup, "pet", check_in_cycle=False)
+                        _key, patt.get_pattern_tier(config, "pet", check_in_cycle=False)
                     )
                 elif tier == "daq":
                     filename = FileKey.get_path_from_filekey(
@@ -223,14 +223,14 @@ def build_filelist(
 
     if tier in concat_tiers:
         phy_filenames = concat_phy_filenames(
-            setup, phy_filenames, tier
+            config, phy_filenames, tier
         )  # concat phy files
 
     return phy_filenames + other_filenames
 
 
 def get_filelist(
-    wildcards, setup, search_pattern, ignore_keys_file=None, analysis_runs_file=None
+    wildcards, config, search_pattern, ignore_keys_file=None, analysis_runs_file=None
 ):
     file_selection = wildcards.label.split("-", 1)[0]
     # remove the file selection from the keypart
@@ -242,7 +242,7 @@ def get_filelist(
     filekeys = get_keys(keypart)
 
     return build_filelist(
-        setup,
+        config,
         filekeys,
         search_pattern,
         wildcards.tier,
@@ -253,7 +253,7 @@ def get_filelist(
 
 def get_filelist_full_wildcards(
     wildcards,
-    setup,
+    config,
     search_pattern,
     tier,
     ignore_keys_file=None,
@@ -268,7 +268,7 @@ def get_filelist_full_wildcards(
 
     filekeys = get_keys(keypart)
     return build_filelist(
-        setup,
+        config,
         filekeys,
         search_pattern,
         tier,
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 1938779..9ae6f77 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -23,11 +23,11 @@ from legenddataflow.patterns import (
 
 hit_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier(setup, "raw", check_in_cycle=False),
+    get_pattern_tier(config, "raw", check_in_cycle=False),
     {"cal": ["par_hit"], "lar": ["par_hit"]},
 )
 
-hit_par_cat_file = Path(pars_path(setup)) / "hit" / "validity.yaml"
+hit_par_cat_file = Path(pars_path(config)) / "hit" / "validity.yaml"
 if hit_par_cat_file.is_file():
     hit_par_cat_file.unlink()
 Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
@@ -38,22 +38,22 @@ ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file)
 rule build_qc:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
         fft_files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-fft-dsp.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist"
         ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
         overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        qc_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "qc")),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "qc")),
+        qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_qc", time),
+        get_pattern_log_channel(config, "pars_hit_qc", time),
     group:
         "par-hit"
     resources:
@@ -79,30 +79,30 @@ rule build_qc:
 rule build_energy_calibration:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
         ctc_dict=ancient(
             lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "dsp"
+                config, wildcards.timestamp, "dsp"
             )
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "hit", "qc"),
-        in_hit_dict=get_pattern_pars_tmp_channel(setup, "hit", "qc"),
+        inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"),
+        in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        ecal_file=temp(get_pattern_pars_tmp_channel(setup, "hit", "energy_cal")),
+        ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")),
         results_file=temp(
             get_pattern_pars_tmp_channel(
-                setup, "hit", "energy_cal_objects", extension="pkl"
+                config, "hit", "energy_cal_objects", extension="pkl"
             )
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "energy_cal")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_energy_cal", time),
+        get_pattern_log_channel(config, "pars_hit_energy_cal", time),
     group:
         "par-hit"
     resources:
@@ -130,28 +130,28 @@ rule build_energy_calibration:
 rule build_aoe_calibration:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "energy_cal"),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"),
         eres_file=get_pattern_pars_tmp_channel(
-            setup, "hit", "energy_cal_objects", extension="pkl"
+            config, "hit", "energy_cal_objects", extension="pkl"
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "hit", "energy_cal"),
+        inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")),
         aoe_results=temp(
             get_pattern_pars_tmp_channel(
-                setup, "hit", "aoe_cal_objects", extension="pkl"
+                config, "hit", "aoe_cal_objects", extension="pkl"
             )
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_aoe_cal", time),
+        get_pattern_log_channel(config, "pars_hit_aoe_cal", time),
     group:
         "par-hit"
     resources:
@@ -179,26 +179,26 @@ rule build_aoe_calibration:
 rule build_lq_calibration:
     input:
         files=os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
         ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(setup, "hit", "aoe_cal"),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"),
         eres_file=get_pattern_pars_tmp_channel(
-            setup, "hit", "aoe_cal_objects", extension="pkl"
+            config, "hit", "aoe_cal_objects", extension="pkl"
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "hit", "aoe_cal"),
+        inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"),
     params:
         timestamp="{timestamp}",
         datatype="cal",
         channel="{channel}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "hit")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")),
         lq_results=temp(
-            get_pattern_pars_tmp_channel(setup, "hit", "objects", extension="pkl")
+            get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl")
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "hit")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")),
     log:
-        get_pattern_log_channel(setup, "pars_hit_lq_cal", time),
+        get_pattern_log_channel(config, "pars_hit_lq_cal", time),
     group:
         "par-hit"
     resources:
@@ -225,7 +225,7 @@ rule build_lq_calibration:
 rule build_pars_hit_objects:
     input:
         lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
@@ -236,7 +236,7 @@ rule build_pars_hit_objects:
         ),
     output:
         get_pattern_pars(
-            setup,
+            config,
             "hit",
             name="objects",
             extension="dir",
@@ -257,7 +257,7 @@ rule build_pars_hit_objects:
 rule build_plts_hit:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
@@ -265,7 +265,7 @@ rule build_plts_hit:
             chan_maps,
         ),
     output:
-        get_pattern_plts(setup, "hit"),
+        get_pattern_plts(config, "hit"),
     params:
         ro_input=lambda _, input: ro(input),
     group:
@@ -281,16 +281,16 @@ rule build_plts_hit:
 rule build_pars_hit:
     input:
         infiles=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "hit",
             basedir,
             det_status,
             chan_maps,
         ),
-        plts=get_pattern_plts(setup, "hit"),
+        plts=get_pattern_plts(config, "hit"),
         objects=get_pattern_pars(
-            setup,
+            config,
             "hit",
             name="objects",
             extension="dir",
@@ -299,7 +299,7 @@ rule build_pars_hit:
     params:
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     output:
-        get_pattern_pars(setup, "hit", check_in_cycle=check_in_cycle),
+        get_pattern_pars(config, "hit", check_in_cycle=check_in_cycle),
     group:
         "merge-hit"
     shell:
@@ -312,20 +312,20 @@ rule build_pars_hit:
 
 rule build_hit:
     input:
-        dsp_file=get_pattern_tier(setup, "dsp", check_in_cycle=False),
+        dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False),
         pars_file=lambda wildcards: ParsCatalog.get_par_file(
-            setup, wildcards.timestamp, "hit"
+            config, wildcards.timestamp, "hit"
         ),
     output:
-        tier_file=get_pattern_tier(setup, "hit", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "hit_db"),
+        tier_file=get_pattern_tier(config, "hit", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(config, "hit_db"),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         tier="hit",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, "tier_hit", time),
+        get_pattern_log(config, "tier_hit", time),
     group:
         "tier-hit"
     resources:
diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk
index e0d886e..a78784d 100644
--- a/workflow/rules/main.smk
+++ b/workflow/rules/main.smk
@@ -38,15 +38,15 @@ rule autogen_output:
     - generate lists of valid keys
     """
     input:
-        filelist=os.path.join(filelist_path(setup), "{label}-{tier}.filelist"),
+        filelist=os.path.join(filelist_path(config), "{label}-{tier}.filelist"),
     output:
         gen_output="{label}-{tier}.gen",
-        summary_log=log_path(setup) + "/summary-{label}-{tier}-" + timestamp + ".log",
-        warning_log=log_path(setup) + "/warning-{label}-{tier}-" + timestamp + ".log",
+        summary_log=log_path(config) + "/summary-{label}-{tier}-" + timestamp + ".log",
+        warning_log=log_path(config) + "/warning-{label}-{tier}-" + timestamp + ".log",
     params:
-        valid_keys_path=os.path.join(pars_path(setup), "valid_keys"),
-        filedb_path=os.path.join(pars_path(setup), "filedb"),
-        setup=lambda wildcards: setup,
+        valid_keys_path=os.path.join(pars_path(config), "valid_keys"),
+        filedb_path=os.path.join(pars_path(config), "filedb"),
+        setup=lambda wildcards: config,
         basedir=basedir,
     threads: min(workflow.cores, 64)
     script:
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index 937cd27..dd1deb4 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -21,13 +21,13 @@ from legenddataflow.patterns import (
     get_pattern_pars,
 )
 
-pht_par_catalog = ds.ParsKeyResolve.get_par_catalog(
+pht_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier(setup, "raw", check_in_cycle=False),
+    get_pattern_tier(config, "raw", check_in_cycle=False),
     {"cal": ["par_pht"], "lar": ["par_pht"]},
 )
 
-pht_par_cat_file = Path(pars_path(setup)) / "pht" / "validity.yaml"
+pht_par_cat_file = Path(pars_path(config)) / "pht" / "validity.yaml"
 if pht_par_cat_file.is_file():
     pht_par_cat_file.unlink()
 Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
@@ -39,11 +39,11 @@ intier = "psp"
 rule pht_checkpoint:
     input:
         files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
     output:
-        temp(get_pattern_pars_tmp_channel(setup, "pht", "check")),
+        temp(get_pattern_pars_tmp_channel(config, "pht", "check")),
     shell:
         "touch {output}"
 
@@ -154,25 +154,25 @@ for key, dataset in part.datasets.items():
 rule build_pht_qc:
     input:
         cal_files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
         fft_files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist",
         ),
-        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        check_file=get_pattern_pars_tmp_channel(setup, "pht", "check"),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        check_file=get_pattern_pars_tmp_channel(config, "pht", "check"),
         overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qc")),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qc")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")),
     log:
-        get_pattern_log_channel(setup, "par_pht_qc", time),
+        get_pattern_log_channel(config, "par_pht_qc", time),
     group:
         "par-pht"
     resources:
@@ -210,15 +210,15 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 rule build_per_energy_calibration:
     input:
         files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
-        pulser=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        pht_dict=get_pattern_pars_tmp_channel(setup, "pht", "qc"),
-        inplots=get_pattern_plts_tmp_channel(setup, "pht", "qc"),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"),
         ctc_dict=ancient(
             lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, intier
+                config, wildcards.timestamp, intier
             )
         ),
     params:
@@ -227,15 +227,15 @@ rule build_per_energy_calibration:
         channel="{channel}",
         tier="pht",
     output:
-        ecal_file=temp(get_pattern_pars_tmp_channel(setup, "pht", "energy_cal")),
+        ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")),
         results_file=temp(
             get_pattern_pars_tmp_channel(
-                setup, "pht", "energy_cal_objects", extension="pkl"
+                config, "pht", "energy_cal_objects", extension="pkl"
             )
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "energy_cal")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")),
     log:
-        get_pattern_log_channel(setup, "par_pht_energy_cal", time),
+        get_pattern_log_channel(config, "par_pht_energy_cal", time),
     group:
         "par-pht"
     resources:
@@ -386,29 +386,29 @@ for key, dataset in part.datasets.items():
 rule build_pht_energy_super_calibrations:
     input:
         files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist",
         ),
-        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"),
         eres_file=get_pattern_pars_tmp_channel(
-            setup, "pht", "energy_cal_objects", extension="pkl"
+            config, "pht", "energy_cal_objects", extension="pkl"
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "pht", "energy_cal"),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "partcal")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")),
         partcal_results=temp(
             get_pattern_pars_tmp_channel(
-                setup, "pht", "partcal_objects", extension="pkl"
+                config, "pht", "partcal_objects", extension="pkl"
             )
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "partcal")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")),
     log:
-        get_pattern_log_channel(setup, "par_pht_partcal", time),
+        get_pattern_log_channel(config, "par_pht_partcal", time),
     group:
         "par-pht"
     resources:
@@ -569,29 +569,29 @@ for key, dataset in part.datasets.items():
 rule build_pht_aoe_calibrations:
     input:
         files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
-        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "partcal"),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"),
         eres_file=get_pattern_pars_tmp_channel(
-            setup, "pht", "partcal_objects", extension="pkl"
+            config, "pht", "partcal_objects", extension="pkl"
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "pht", "partcal"),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "aoecal")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")),
         aoe_results=temp(
             get_pattern_pars_tmp_channel(
-                setup, "pht", "aoecal_objects", extension="pkl"
+                config, "pht", "aoecal_objects", extension="pkl"
             )
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "aoecal")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")),
     log:
-        get_pattern_log_channel(setup, "par_pht_aoe_cal", time),
+        get_pattern_log_channel(config, "par_pht_aoe_cal", time),
     group:
         "par-pht"
     resources:
@@ -747,27 +747,27 @@ for key, dataset in part.datasets.items():
 rule build_pht_lq_calibration:
     input:
         files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
         ),
-        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "aoecal"),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"),
         eres_file=get_pattern_pars_tmp_channel(
-            setup, "pht", "aoecal_objects", extension="pkl"
+            config, "pht", "aoecal_objects", extension="pkl"
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "pht", "aoecal"),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")),
         lq_results=temp(
-            get_pattern_pars_tmp_channel(setup, "pht", "objects", extension="pkl")
+            get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl")
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")),
     log:
-        get_pattern_log_channel(setup, "par_pht_lq_cal", time),
+        get_pattern_log_channel(config, "par_pht_lq_cal", time),
     group:
         "par-pht"
     resources:
@@ -806,7 +806,7 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 rule build_pars_pht_objects:
     input:
         lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
@@ -817,7 +817,7 @@ rule build_pars_pht_objects:
         ),
     output:
         get_pattern_pars(
-            setup,
+            config,
             "pht",
             name="objects",
             extension="dir",
@@ -835,7 +835,7 @@ rule build_pars_pht_objects:
 rule build_plts_pht:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
@@ -843,7 +843,7 @@ rule build_plts_pht:
             chan_maps,
         ),
     output:
-        get_pattern_plts(setup, "pht"),
+        get_pattern_plts(config, "pht"),
     group:
         "merge-hit"
     shell:
@@ -856,23 +856,23 @@ rule build_plts_pht:
 rule build_pars_pht:
     input:
         infiles=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
             det_status,
             chan_maps,
         ),
-        plts=get_pattern_plts(setup, "pht"),
+        plts=get_pattern_plts(config, "pht"),
         objects=get_pattern_pars(
-            setup,
+            config,
             "pht",
             name="objects",
             extension="dir",
             check_in_cycle=check_in_cycle,
         ),
     output:
-        get_pattern_pars(setup, "pht", check_in_cycle=check_in_cycle),
+        get_pattern_pars(config, "pht", check_in_cycle=check_in_cycle),
     group:
         "merge-hit"
     shell:
@@ -884,20 +884,20 @@ rule build_pars_pht:
 
 rule build_pht:
     input:
-        dsp_file=get_pattern_tier(setup, intier, check_in_cycle=False),
+        dsp_file=get_pattern_tier(config, intier, check_in_cycle=False),
         pars_file=lambda wildcards: ParsCatalog.get_par_file(
-            setup, wildcards.timestamp, "pht"
+            config, wildcards.timestamp, "pht"
         ),
     output:
-        tier_file=get_pattern_tier(setup, "pht", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "pht_db"),
+        tier_file=get_pattern_tier(config, "pht", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(config, "pht_db"),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         tier="pht",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     log:
-        get_pattern_log(setup, "tier_pht", time),
+        get_pattern_log(config, "tier_pht", time),
     group:
         "tier-pht"
     resources:
diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk
index b017e89..75f7a47 100644
--- a/workflow/rules/pht_fast.smk
+++ b/workflow/rules/pht_fast.smk
@@ -137,27 +137,27 @@ for key, dataset in part.datasets.items():
 rule par_pht_fast:
     input:
         files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist",
         ),
-        pulser_files=get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(setup, "pht", "energy_cal"),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"),
         eres_file=get_pattern_pars_tmp_channel(
-            setup, "pht", "energy_cal_objects", extension="pkl"
+            config, "pht", "energy_cal_objects", extension="pkl"
         ),
-        inplots=get_pattern_plts_tmp_channel(setup, "pht", "energy_cal"),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")),
         partcal_results=temp(
-            get_pattern_pars_tmp_channel(setup, "pht", "objects", extension="pkl")
+            get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl")
         ),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")),
     log:
-        get_pattern_log_channel(setup, "par_pht_fast", time),
+        get_pattern_log_channel(config, "par_pht_fast", time),
     group:
         "par-pht"
     resources:
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index 02df934..456d30a 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -17,11 +17,11 @@ from legenddataflow.patterns import (
 
 psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier(setup, "raw", check_in_cycle=False),
+    get_pattern_tier(config, "raw", check_in_cycle=False),
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
-psp_par_cat_file = Path(pars_path(setup)) / "psp" / "validity.yaml"
+psp_par_cat_file = Path(pars_path(config)) / "psp" / "validity.yaml"
 if psp_par_cat_file.is_file():
     psp_par_cat_file.unlink()
 Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
@@ -31,7 +31,7 @@ ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
 rule build_pars_psp_objects:
     input:
         lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
@@ -42,7 +42,7 @@ rule build_pars_psp_objects:
         ),
     output:
         get_pattern_pars(
-            setup,
+            config,
             "psp",
             name="objects",
             extension="dir",
@@ -61,7 +61,7 @@ rule build_pars_psp_objects:
 rule build_plts_psp:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
@@ -69,7 +69,7 @@ rule build_plts_psp:
             chan_maps,
         ),
     output:
-        get_pattern_plts(setup, "psp"),
+        get_pattern_plts(config, "psp"),
     group:
         "merge-psp"
     shell:
@@ -83,7 +83,7 @@ rule build_plts_psp:
 rule build_pars_psp_db:
     input:
         lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "psp",
             basedir,
@@ -93,7 +93,7 @@ rule build_pars_psp_db:
     output:
         temp(
             get_pattern_pars_tmp(
-                setup,
+                config,
                 "psp",
                 datatype="cal",
             )
@@ -111,7 +111,7 @@ rule build_pars_psp_db:
 rule build_pars_psp:
     input:
         in_files=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "dsp",
             basedir,
@@ -121,13 +121,13 @@ rule build_pars_psp:
             extension="lh5",
         ),
         in_db=get_pattern_pars_tmp(
-            setup,
+            config,
             "psp",
             datatype="cal",
         ),
-        plts=get_pattern_plts(setup, "psp"),
+        plts=get_pattern_plts(config, "psp"),
         objects=get_pattern_pars(
-            setup,
+            config,
             "psp",
             name="objects",
             extension="dir",
@@ -135,12 +135,12 @@ rule build_pars_psp:
         ),
     output:
         out_file=get_pattern_pars(
-            setup,
+            config,
             "psp",
             extension="lh5",
             check_in_cycle=check_in_cycle,
         ),
-        out_db=get_pattern_pars(setup, "psp", check_in_cycle=check_in_cycle),
+        out_db=get_pattern_pars(config, "psp", check_in_cycle=check_in_cycle),
     group:
         "merge-psp"
     shell:
@@ -155,10 +155,10 @@ rule build_pars_psp:
 
 rule build_psp:
     input:
-        raw_file=get_pattern_tier(setup, "raw", check_in_cycle=False),
+        raw_file=get_pattern_tier(config, "raw", check_in_cycle=False),
         pars_file=ancient(
             lambda wildcards: ParsCatalog.get_par_file(
-                setup, wildcards.timestamp, "psp"
+                config, wildcards.timestamp, "psp"
             )
         ),
     params:
@@ -166,10 +166,10 @@ rule build_psp:
         datatype="{datatype}",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     output:
-        tier_file=get_pattern_tier(setup, "psp", check_in_cycle=check_in_cycle),
-        db_file=get_pattern_pars_tmp(setup, "psp_db"),
+        tier_file=get_pattern_tier(config, "psp", check_in_cycle=check_in_cycle),
+        db_file=get_pattern_pars_tmp(config, "psp_db"),
     log:
-        get_pattern_log(setup, "tier_psp", time),
+        get_pattern_log(config, "tier_psp", time),
     group:
         "tier-dsp"
     resources:
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index a7938f4..6ac7e05 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -4,20 +4,20 @@ Snakemake rules for processing psp (partition dsp) tier data.
 - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
 """
 
-from legenddataflow.pars_loading import pars_catalog
-from legenddataflow.create_pars_keylist import pars_key_resolve
-from legenddataflow.utils import par_psp_path, par_dsp_path, set_last_rule_name
+from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.utils import set_last_rule_name
 from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_log_channel,
     get_pattern_log,
     get_pattern_pars,
+    get_pattern_tier,
 )
 
-psp_par_catalog = pars_key_resolve.get_par_catalog(
+psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
-    get_pattern_tier_raw(setup),
+    get_pattern_tier(config, "raw", check_in_cycle=False),
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
@@ -121,21 +121,21 @@ for key, dataset in part.datasets.items():
 # This rule builds the a/e calibration using the calibration dsp files for the whole partition
 rule build_par_psp:
     input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "dsp", "eopt"),
-        dsp_objs=get_pattern_pars_tmp_channel(setup, "dsp", "objects", extension="pkl"),
-        dsp_plots=get_pattern_plts_tmp_channel(setup, "dsp"),
+        dsp_pars=get_pattern_pars_tmp_channel(config, "dsp", "eopt"),
+        dsp_objs=get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl"),
+        dsp_plots=get_pattern_plts_tmp_channel(config, "dsp"),
     params:
         datatype="cal",
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        psp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp", "eopt")),
+        psp_pars=temp(get_pattern_pars_tmp_channel(config, "psp", "eopt")),
         psp_objs=temp(
-            get_pattern_pars_tmp_channel(setup, "psp", "objects", extension="pkl")
+            get_pattern_pars_tmp_channel(config, "psp", "objects", extension="pkl")
         ),
-        psp_plots=temp(get_pattern_plts_tmp_channel(setup, "psp")),
+        psp_plots=temp(get_pattern_plts_tmp_channel(config, "psp")),
     log:
-        get_pattern_log_channel(setup, "pars_psp", time),
+        get_pattern_log_channel(config, "pars_psp", time),
     group:
         "par-psp"
     resources:
@@ -168,14 +168,16 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
 rule build_svm_psp:
     input:
-        hyperpars=lambda wildcards: get_svm_file(wildcards, "psp", "svm_hyperpars"),
-        train_data=lambda wildcards: get_svm_file(
+        hyperpars=lambda wildcards: get_input_par_file(
             wildcards, "psp", "svm_hyperpars"
+        ),
+        train_data=lambda wildcards: str(
+            get_input_par_file(wildcards, "psp", "svm_hyperpars")
         ).replace("hyperpars.json", "train.lh5"),
     output:
-        dsp_pars=get_pattern_pars(setup, "psp", "svm", "pkl"),
+        dsp_pars=get_pattern_pars(config, "psp", "svm", "pkl"),
     log:
-        get_pattern_log(setup, "pars_psp_svm", time).replace("{datatype}", "cal"),
+        str(get_pattern_log(config, "pars_psp_svm", time)).replace("{datatype}", "cal"),
     group:
         "par-dsp-svm"
     resources:
@@ -191,12 +193,12 @@ rule build_svm_psp:
 
 rule build_pars_psp_svm:
     input:
-        dsp_pars=get_pattern_pars_tmp_channel(setup, "psp_eopt"),
-        svm_model=get_pattern_pars(setup, "psp", "svm", "pkl"),
+        dsp_pars=get_pattern_pars_tmp_channel(config, "psp_eopt"),
+        svm_model=get_pattern_pars(config, "psp", "svm", "pkl"),
     output:
-        dsp_pars=temp(get_pattern_pars_tmp_channel(setup, "psp")),
+        dsp_pars=temp(get_pattern_pars_tmp_channel(config, "psp")),
     log:
-        get_pattern_log_channel(setup, "pars_dsp_svm", time),
+        get_pattern_log_channel(config, "pars_dsp_svm", time),
     group:
         "par-dsp"
     resources:
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index b04f1ef..522eb45 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -90,7 +90,7 @@ for key, dataset in part.datasets.items():
 rule build_pht_qc_phy:
     input:
         phy_files=os.path.join(
-            filelist_path(setup),
+            filelist_path(config),
             "all-{experiment}-{period}-{run}-phy-" + f"{intier}.filelist",
         ),
     params:
@@ -98,10 +98,10 @@ rule build_pht_qc_phy:
         channel="{channel}",
         timestamp="{timestamp}",
     output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(setup, "pht", "qcphy")),
-        plot_file=temp(get_pattern_plts_tmp_channel(setup, "pht", "qcphy")),
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qcphy")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qcphy")),
     log:
-        get_pattern_log_channel(setup, "pars_pht_qc_phy", time),
+        get_pattern_log_channel(config, "pars_pht_qc_phy", time),
     group:
         "par-pht"
     resources:
@@ -134,7 +134,7 @@ workflow._ruleorder.add(*rule_order_list)  # [::-1]
 rule build_plts_pht_phy:
     input:
         lambda wildcards: get_plt_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
@@ -143,7 +143,7 @@ rule build_plts_pht_phy:
             name="qcphy",
         ),
     output:
-        get_pattern_plts(setup, "pht", "qc_phy"),
+        get_pattern_plts(config, "pht", "qc_phy"),
     group:
         "merge-hit"
     shell:
@@ -156,7 +156,7 @@ rule build_plts_pht_phy:
 rule build_pars_pht_phy:
     input:
         infiles=lambda wildcards: get_par_chanlist(
-            setup,
+            config,
             f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
             "pht",
             basedir,
@@ -164,9 +164,9 @@ rule build_pars_pht_phy:
             chan_maps,
             name="qcphy",
         ),
-        plts=get_pattern_plts(setup, "pht", "qc_phy"),
+        plts=get_pattern_plts(config, "pht", "qc_phy"),
     output:
-        get_pattern_pars(setup, "pht", name="qc_phy", check_in_cycle=check_in_cycle),
+        get_pattern_pars(config, "pht", name="qc_phy", check_in_cycle=check_in_cycle),
     group:
         "merge-hit"
     shell:
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index 4bd1e7a..311c14c 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -11,9 +11,9 @@ from legenddataflow.create_pars_keylist import ParsKeyResolve
 raw_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
     [
-        get_pattern_tier_daq_unsorted(setup, extension="*"),
-        get_pattern_tier_daq(setup, extension="*"),
-        get_pattern_tier(setup, "raw", check_in_cycle=False),
+        get_pattern_tier_daq_unsorted(config, extension="*"),
+        get_pattern_tier_daq(config, extension="*"),
+        get_pattern_tier(config, "raw", check_in_cycle=False),
     ],
     {"cal": ["par_raw"]},
 )
@@ -24,15 +24,15 @@ rule build_raw_orca:
     This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
     """
     input:
-        get_pattern_tier_daq(setup, extension="orca"),
+        get_pattern_tier_daq(config, extension="orca"),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         ro_input=lambda _, input: ro(input),
     output:
-        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+        get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle),
     log:
-        get_pattern_log(setup, "tier_raw", time),
+        get_pattern_log(config, "tier_raw", time),
     group:
         "tier-raw"
     resources:
@@ -54,15 +54,15 @@ rule build_raw_fcio:
     This rule runs build_raw, it takes in a file.{daq_ext} and outputs a raw file
     """
     input:
-        get_pattern_tier_daq(setup, extension="fcio"),
+        get_pattern_tier_daq(config, extension="fcio"),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         ro_input=lambda _, input: ro(input),
     output:
-        get_pattern_tier(setup, "raw", check_in_cycle=check_in_cycle),
+        get_pattern_tier(config, "raw", check_in_cycle=check_in_cycle),
     log:
-        get_pattern_log(setup, "tier_raw", time),
+        get_pattern_log(config, "tier_raw", time),
     group:
         "tier-raw"
     resources:
@@ -85,7 +85,7 @@ rule build_raw_blind:
     and runs only if the blinding check file is on disk. Output is just the blinded raw file.
     """
     input:
-        tier_file=str(get_pattern_tier(setup, "raw", check_in_cycle=False)).replace(
+        tier_file=str(get_pattern_tier(config, "raw", check_in_cycle=False)).replace(
             "{datatype}", "phy"
         ),
         blind_file=get_blinding_curve_file,
@@ -94,9 +94,11 @@ rule build_raw_blind:
         datatype="phy",
         ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
     output:
-        get_pattern_tier_raw_blind(setup),
+        get_pattern_tier_raw_blind(config),
     log:
-        str(get_pattern_log(setup, "tier_raw_blind", time)).replace("{datatype}", "phy"),
+        str(get_pattern_log(config, "tier_raw_blind", time)).replace(
+            "{datatype}", "phy"
+        ),
     group:
         "tier-raw"
     resources:
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index ac05738..91a8755 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -12,15 +12,15 @@ from legenddataflow.patterns import (
 
 rule build_skm:
     input:
-        get_pattern_tier(setup, "pet_concat", check_in_cycle=False),
+        get_pattern_tier(config, "pet_concat", check_in_cycle=False),
     output:
-        get_pattern_tier(setup, "skm", check_in_cycle=check_in_cycle),
+        get_pattern_tier(config, "skm", check_in_cycle=check_in_cycle),
     params:
         timestamp="20230410T000000Z",
         datatype="phy",
         ro_input=lambda _, input: ro(input),
     log:
-        get_pattern_log_concat(setup, "tier_skm", time),
+        get_pattern_log_concat(config, "tier_skm", time),
     group:
         "tier-skm"
     resources:
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index 2bc1686..9d80d1b 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -13,15 +13,15 @@ from legenddataflow.patterns import (
 # This rule builds the tcm files each raw file
 rule build_tier_tcm:
     input:
-        get_pattern_tier(setup, "raw", check_in_cycle=False),
+        get_pattern_tier(config, "raw", check_in_cycle=False),
     params:
         timestamp="{timestamp}",
         datatype="{datatype}",
         input=lambda _, input: ro(input),
     output:
-        get_pattern_tier(setup, "tcm", check_in_cycle=check_in_cycle),
+        get_pattern_tier(config, "tcm", check_in_cycle=check_in_cycle),
     log:
-        get_pattern_log(setup, "tier_tcm", time),
+        get_pattern_log(config, "tier_tcm", time),
     group:
         "tier-tcm"
     resources:
@@ -41,7 +41,7 @@ rule build_tier_tcm:
 rule build_pulser_ids:
     input:
         os.path.join(
-            filelist_path(setup), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-tcm.filelist"
         ),
     params:
         input=lambda _, input: ro(input),
@@ -49,9 +49,9 @@ rule build_pulser_ids:
         datatype="cal",
         channel="{channel}",
     output:
-        pulser=temp(get_pattern_pars_tmp_channel(setup, "tcm", "pulser_ids")),
+        pulser=temp(get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids")),
     log:
-        get_pattern_log_channel(setup, "tcm_pulsers", time),
+        get_pattern_log_channel(config, "tcm_pulsers", time),
     group:
         "tier-tcm"
     resources:

From 813e0709c7cfcb4ed9b209caa49ae45a2106fcfc Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 00:06:30 +0100
Subject: [PATCH 078/101] scripts to functions and rename

---
 .../scripts/blinding_calibration.py           | 115 -----
 .../src/legenddataflow/scripts/build_dsp.py   | 167 --------
 .../src/legenddataflow/scripts/build_evt.py   | 182 --------
 .../src/legenddataflow/scripts/build_fdb.py   |  84 ----
 .../src/legenddataflow/scripts/build_hit.py   |  96 -----
 .../legenddataflow/scripts/build_raw_blind.py | 181 --------
 .../legenddataflow/scripts/build_raw_fcio.py  |  68 ---
 .../legenddataflow/scripts/build_raw_orca.py  | 108 -----
 .../src/legenddataflow/scripts/build_skm.py   |  96 -----
 .../src/legenddataflow/scripts/build_tcm.py   |  53 ---
 .../legenddataflow/scripts/check_blinding.py  | 108 -----
 .../legenddataflow/scripts/complete_run.py    |   4 +-
 .../scripts/create_chankeylist.py             |  42 +-
 workflow/src/legenddataflow/scripts/filedb.py |  88 ++++
 .../legenddataflow/scripts/merge_channels.py  | 252 +++++------
 .../scripts/par/geds/dsp/dplms.py             | 150 +++++++
 .../scripts/par/geds/dsp/eopt.py              | 398 ++++++++++++++++++
 .../geds/dsp/evtsel.py}                       |   8 +-
 .../scripts/par/geds/dsp/nopt.py              | 112 +++++
 .../scripts/par/geds/dsp/svm.py               |  26 ++
 .../scripts/par/geds/dsp/svm_build.py         |  63 +++
 .../scripts/par/geds/dsp/tau.py               | 146 +++++++
 .../scripts/par/geds/hit/aoe.py               | 262 ++++++++++++
 .../geds/hit/ecal.py}                         |  10 +-
 .../legenddataflow/scripts/par/geds/hit/lq.py | 230 ++++++++++
 .../{pars_hit_qc.py => par/geds/hit/qc.py}    |   9 +-
 .../geds/pht/aoe.py}                          | 117 ++---
 .../geds/pht/ecal_part.py}                    |   4 +-
 .../geds/pht/fast.py}                         |  14 +-
 .../{pars_pht_lqcal.py => par/geds/pht/lq.py} |   0
 .../{pars_pht_qc.py => par/geds/pht/qc.py}    |   7 +-
 .../geds/pht/qc_phy.py}                       |   7 +-
 .../scripts/par/geds/psp/average.py           | 160 +++++++
 .../par/geds/raw/blinding_calibration.py      | 119 ++++++
 .../scripts/par/geds/raw/check_blinding.py    | 114 +++++
 .../scripts/par/geds/tcm/pars_tcm_pulser.py   |  58 +++
 .../legenddataflow/scripts/par_psp_geds.py    | 157 -------
 .../scripts/pars_dsp_build_svm_geds.py        |  57 ---
 .../scripts/pars_dsp_dplms_geds.py            | 148 -------
 .../scripts/pars_dsp_eopt_geds.py             | 395 -----------------
 .../scripts/pars_dsp_nopt_geds.py             | 108 -----
 .../scripts/pars_dsp_svm_geds.py              |  20 -
 .../scripts/pars_dsp_tau_geds.py              | 139 ------
 .../legenddataflow/scripts/pars_hit_aoe.py    | 290 -------------
 .../src/legenddataflow/scripts/pars_hit_lq.py | 283 -------------
 .../legenddataflow/scripts/pars_tcm_pulser.py |  57 ---
 .../src/legenddataflow/scripts/tier/dsp.py    | 171 ++++++++
 .../src/legenddataflow/scripts/tier/evt.py    | 187 ++++++++
 .../src/legenddataflow/scripts/tier/hit.py    |  98 +++++
 .../legenddataflow/scripts/tier/raw_blind.py  | 185 ++++++++
 .../legenddataflow/scripts/tier/raw_fcio.py   |  72 ++++
 .../legenddataflow/scripts/tier/raw_orca.py   | 110 +++++
 .../src/legenddataflow/scripts/tier/skm.py    |  96 +++++
 .../src/legenddataflow/scripts/tier/tcm.py    |  55 +++
 54 files changed, 3115 insertions(+), 3171 deletions(-)
 delete mode 100644 workflow/src/legenddataflow/scripts/blinding_calibration.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_dsp.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_evt.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_fdb.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_hit.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_raw_blind.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_raw_fcio.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_raw_orca.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_skm.py
 delete mode 100644 workflow/src/legenddataflow/scripts/build_tcm.py
 delete mode 100644 workflow/src/legenddataflow/scripts/check_blinding.py
 create mode 100644 workflow/src/legenddataflow/scripts/filedb.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
 rename workflow/src/legenddataflow/scripts/{pars_dsp_evtsel_geds.py => par/geds/dsp/evtsel.py} (98%)
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
 rename workflow/src/legenddataflow/scripts/{pars_hit_ecal.py => par/geds/hit/ecal.py} (99%)
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
 rename workflow/src/legenddataflow/scripts/{pars_hit_qc.py => par/geds/hit/qc.py} (98%)
 rename workflow/src/legenddataflow/scripts/{pars_pht_aoecal.py => par/geds/pht/aoe.py} (84%)
 rename workflow/src/legenddataflow/scripts/{pars_pht_partcal.py => par/geds/pht/ecal_part.py} (99%)
 rename workflow/src/legenddataflow/scripts/{pars_pht_fast.py => par/geds/pht/fast.py} (95%)
 rename workflow/src/legenddataflow/scripts/{pars_pht_lqcal.py => par/geds/pht/lq.py} (100%)
 rename workflow/src/legenddataflow/scripts/{pars_pht_qc.py => par/geds/pht/qc.py} (98%)
 rename workflow/src/legenddataflow/scripts/{pars_pht_qc_phy.py => par/geds/pht/qc_phy.py} (97%)
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/psp/average.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py
 create mode 100644 workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py
 delete mode 100644 workflow/src/legenddataflow/scripts/par_psp_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_hit_aoe.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_hit_lq.py
 delete mode 100644 workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/dsp.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/evt.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/hit.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/raw_blind.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/raw_fcio.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/raw_orca.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/skm.py
 create mode 100644 workflow/src/legenddataflow/scripts/tier/tcm.py

diff --git a/workflow/src/legenddataflow/scripts/blinding_calibration.py b/workflow/src/legenddataflow/scripts/blinding_calibration.py
deleted file mode 100644
index e4b79f2..0000000
--- a/workflow/src/legenddataflow/scripts/blinding_calibration.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-This script applies a simple calibration to the daqenergy for all channels,
-it does this using a peak search, matching the peaks to the given ones
-and deriving a simple scaling relation from adc to keV.
-"""
-
-import argparse
-import logging
-import pickle as pkl
-from pathlib import Path
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import numpy as np
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from lgdo import lh5
-from pygama.pargen.energy_cal import HPGeCalibration
-
-mpl.use("agg")
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--files", help="files", nargs="*", type=str)
-
-argparser.add_argument("--blind_curve", help="blind_curve", type=str)
-argparser.add_argument("--plot_file", help="out plot path", type=str)
-
-argparser.add_argument("--meta", help="meta", type=str)
-argparser.add_argument("--configs", help="configs", type=str)
-argparser.add_argument("--log", help="log", type=str)
-
-argparser.add_argument("--timestamp", help="timestamp", type=str)
-argparser.add_argument("--datatype", help="datatype", type=str)
-argparser.add_argument("--channel", help="channel", type=str)
-
-argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
-args = argparser.parse_args()
-
-logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("matplotlib").setLevel(logging.INFO)
-log = logging.getLogger(__name__)
-
-# load in channel map
-meta = LegendMetadata(args.meta, lazy=True)
-chmap = meta.channelmap(args.timestamp)
-
-# if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True:
-pars_dict = {}
-# peaks to search for
-peaks_keV = np.array(
-    [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50]
-)
-
-E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as("np")
-E_uncal = E_uncal[E_uncal > 200]
-guess_keV = 2620 / np.nanpercentile(E_uncal, 99)  # usual simple guess
-Euc_min = peaks_keV[0] / guess_keV * 0.6
-Euc_max = peaks_keV[-1] / guess_keV * 1.1
-dEuc = 1 / guess_keV
-
-# daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins)
-
-
-hpge_cal = HPGeCalibration(
-    "daqenergy",
-    peaks_keV,
-    guess_keV,
-    0,
-    uncal_is_int=True,
-    debug_mode=args.debug,
-)
-
-# Run the rough peak search
-detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(E_uncal)
-
-log.info(f"{len(detected_peaks_locs)} peaks found:")
-log.info("\t   Energy   | Position  ")
-for i, (Li, Ei) in enumerate(zip(detected_peaks_locs, detected_peaks_keV)):
-    log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5))  # noqa: G003
-
-# dictionary to pass to build hit
-out_dict = {
-    "pars": {
-        "operations": {
-            "daqenergy_cal": {
-                "expression": "daqenergy*a",
-                "parameters": {"a": round(roughpars[0], 5)},
-            }
-        }
-    }
-}
-
-# plot to check thagt the calibration is correct with zoom on 2.6 peak
-fig = plt.figure(figsize=(8, 10))
-ax = plt.subplot(211)
-ax.hist(E_uncal * roughpars[0], bins=np.arange(0, 3000, 1), histtype="step")
-ax.set_ylabel("counts")
-ax.set_yscale("log")
-ax2 = plt.subplot(212)
-ax2.hist(
-    E_uncal * roughpars[0],
-    bins=np.arange(2600, 2630, 1 * roughpars[0]),
-    histtype="step",
-)
-ax2.set_xlabel("energy (keV)")
-ax2.set_ylabel("counts")
-plt.suptitle(args.channel)
-with Path(args.plot_file).open("wb") as w:
-    pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
-plt.close()
-
-Props.write_to_file(args.blind_curve, out_dict)
diff --git a/workflow/src/legenddataflow/scripts/build_dsp.py b/workflow/src/legenddataflow/scripts/build_dsp.py
deleted file mode 100644
index 7e44bb6..0000000
--- a/workflow/src/legenddataflow/scripts/build_dsp.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import argparse
-import re
-import time
-import warnings
-from pathlib import Path
-
-import numpy as np
-from dbetto import TextDB
-from dbetto.catalog import Props
-from dspeed import build_dsp
-from legendmeta import LegendMetadata
-from lgdo import lh5
-
-from ..log import build_log
-
-
-def replace_list_with_array(dic):
-    for key, value in dic.items():
-        if isinstance(value, dict):
-            dic[key] = replace_list_with_array(value)
-        elif isinstance(value, list):
-            dic[key] = np.array(value, dtype="float32")
-        else:
-            pass
-    return dic
-
-
-warnings.filterwarnings(action="ignore", category=RuntimeWarning)
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs path", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-argparser.add_argument("--log", help="log file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--tier", help="Tier", type=str, required=True)
-
-argparser.add_argument(
-    "--pars_file", help="database file for detector", nargs="*", default=[]
-)
-argparser.add_argument("--input", help="input file", type=str)
-
-argparser.add_argument("--output", help="output file", type=str)
-argparser.add_argument("--db_file", help="db file", type=str)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
-if args.tier in ["dsp", "psp"]:
-    config_dict = config_dict["tier_dsp"]
-elif args.tier in ["ann", "pan"]:
-    config_dict = config_dict["tier_ann"]
-else:
-    msg = f"Tier {args.tier} not supported"
-    raise ValueError(msg)
-
-log = build_log(config_dict, args.log)
-
-channel_dict = config_dict["inputs"]["processing_chain"]
-settings_dict = config_dict["options"].get("settings", {})
-if isinstance(settings_dict, str):
-    settings_dict = Props.read_from(settings_dict)
-
-meta = LegendMetadata(path=args.metadata)
-chan_map = meta.channelmap(args.timestamp, system=args.datatype)
-
-if args.tier in ["ann", "pan"]:
-    channel_dict = {
-        f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file)
-        for chan, file in channel_dict.items()
-    }
-else:
-    channel_dict = {
-        f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
-        for chan, file in channel_dict.items()
-    }
-db_files = [
-    par_file
-    for par_file in args.pars_file
-    if Path(par_file).suffix in (".json", ".yaml", ".yml")
-]
-
-database_dic = Props.read_from(db_files, subst_pathvar=True)
-database_dic = replace_list_with_array(database_dic)
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
-rng = np.random.default_rng()
-rand_num = f"{rng.integers(0, 99999):05d}"
-temp_output = f"{args.output}.{rand_num}"
-
-start = time.time()
-
-build_dsp(
-    args.input,
-    temp_output,
-    {},
-    database=database_dic,
-    chan_config=channel_dict,
-    write_mode="r",
-    buffer_len=settings_dict.get("buffer_len", 1000),
-    block_width=settings_dict.get("block_width", 16),
-)
-
-log.info(f"build_dsp finished in {time.time()-start}")
-Path(temp_output).rename(args.output)
-
-key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "")
-
-if args.tier in ["dsp", "psp"]:
-    raw_channels = [
-        channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)
-    ]
-    raw_fields = [
-        field.split("/")[-1] for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")
-    ]
-
-    outputs = {}
-    channels = []
-    for channel, chan_dict in channel_dict.items():
-        output = chan_dict["outputs"]
-        in_dict = False
-        for entry in outputs:
-            if outputs[entry]["fields"] == output:
-                outputs[entry]["channels"].append(channel.split("/")[0])
-                in_dict = True
-        if in_dict is False:
-            outputs[f"group{len(list(outputs))+1}"] = {
-                "channels": [channel.split("/")[0]],
-                "fields": output,
-            }
-        channels.append(channel.split("/")[0])
-
-    full_dict = {
-        "valid_fields": {
-            "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}},
-            "dsp": outputs,
-        },
-        "valid_keys": {key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}},
-    }
-else:
-    outputs = {}
-    channels = []
-    for channel, chan_dict in channel_dict.items():
-        output = chan_dict["outputs"]
-        in_dict = False
-        for entry in outputs:
-            if outputs[entry]["fields"] == output:
-                outputs[entry]["channels"].append(channel.split("/")[0])
-                in_dict = True
-        if in_dict is False:
-            outputs[f"group{len(list(outputs))+1}"] = {
-                "channels": [channel.split("/")[0]],
-                "fields": output,
-            }
-        channels.append(channel.split("/")[0])
-
-    full_dict = {
-        "valid_fields": {
-            "ann": outputs,
-        },
-        "valid_keys": {key: {"valid_channels": {"ann": channels}}},
-    }
-
-Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.db_file, full_dict)
diff --git a/workflow/src/legenddataflow/scripts/build_evt.py b/workflow/src/legenddataflow/scripts/build_evt.py
deleted file mode 100644
index b4723b4..0000000
--- a/workflow/src/legenddataflow/scripts/build_evt.py
+++ /dev/null
@@ -1,182 +0,0 @@
-import argparse
-import json
-import time
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-from dbetto import Props, TextDB
-from legendmeta import LegendMetadata
-from lgdo.types import Array
-from pygama.evt import build_evt
-
-from ..log import build_log
-
-sto = lh5.LH5Store()
-
-
-def find_matching_values_with_delay(arr1, arr2, jit_delay):
-    matching_values = []
-
-    # Create an array with all possible delay values
-    delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay
-
-    for delay in delays:
-        arr2_delayed = arr2 + delay
-
-        # Find matching values and indices
-        mask = np.isin(arr1, arr2_delayed, assume_unique=True)
-        matching_values.extend(arr1[mask])
-
-    return np.unique(matching_values)
-
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--hit_file", help="hit file", type=str)
-argparser.add_argument("--dsp_file", help="dsp file", type=str)
-argparser.add_argument("--tcm_file", help="tcm file", type=str)
-argparser.add_argument("--ann_file", help="ann file")
-argparser.add_argument("--xtc_file", help="xtc file", type=str)
-argparser.add_argument("--par_files", help="par files", nargs="*")
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--tier", help="Tier", type=str, required=True)
-
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
-argparser.add_argument("--log", help="log_file", type=str)
-
-argparser.add_argument("--output", help="output file", type=str)
-args = argparser.parse_args()
-
-# load in config
-configs = TextDB(args.configs, lazy=True)
-if args.tier in ("evt", "pet"):
-    rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "tier_evt"
-    ]
-
-else:
-    msg = "unknown tier"
-    raise ValueError(msg)
-
-config_dict = rule_dict["inputs"]
-evt_config_file = config_dict["evt_config"]
-
-log = build_log(rule_dict, args.log)
-
-meta = LegendMetadata(args.metadata, lazy=True)
-chmap = meta.channelmap(args.timestamp)
-
-evt_config = Props.read_from(evt_config_file)
-
-if args.datatype in ("phy", "xtc"):
-    exp_string = evt_config["operations"]["geds___energy"]["expression"]
-    exp_string = exp_string.replace(
-        'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"'
-    )
-    exp_string = exp_string.replace(
-        'cal_par_files=""', f"cal_par_files={args.par_files}"
-    )
-    exp_string2 = exp_string.replace('return_mode="energy"', 'return_mode="tcm_index"')
-
-    file_path_config = {
-        "operations": {
-            "geds___energy": {"expression": exp_string},
-            "_geds___tcm_idx": {"expression": exp_string2},
-        }
-    }
-
-    log.debug(json.dumps(file_path_config, indent=2))
-
-    Props.add_to(evt_config, file_path_config)
-
-# block for snakemake to fill in channel lists
-for field, dic in evt_config["channels"].items():
-    if isinstance(dic, dict):
-        chans = chmap.map("system", unique=False)[dic["system"]]
-        if "selectors" in dic:
-            try:
-                for k, val in dic["selectors"].items():
-                    chans = chans.map(k, unique=False)[val]
-            except KeyError:
-                chans = None
-        if chans is not None:
-            chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
-        else:
-            chans = []
-        evt_config["channels"][field] = chans
-
-log.debug(json.dumps(evt_config["channels"], indent=2))
-
-t_start = time.time()
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
-file_table = {
-    "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
-    "dsp": (args.dsp_file, "dsp", "ch{}"),
-    "hit": (args.hit_file, "hit", "ch{}"),
-    "evt": (None, "evt"),
-}
-
-if args.ann_file is not None:
-    file_table["ann"] = (args.ann_file, "dsp", "ch{}")
-
-table = build_evt(
-    file_table,
-    evt_config,
-)
-
-if "muon_config" in config_dict and config_dict["muon_config"] is not None:
-    muon_config = Props.read_from(config_dict["muon_config"]["evt_config"])
-    field_config = Props.read_from(config_dict["muon_config"]["field_config"])
-    # block for snakemake to fill in channel lists
-    for field, dic in muon_config["channels"].items():
-        if isinstance(dic, dict):
-            chans = chmap.map("system", unique=False)[dic["system"]]
-            if "selectors" in dic:
-                try:
-                    for k, val in dic["selectors"].items():
-                        chans = chans.map(k, unique=False)[val]
-                except KeyError:
-                    chans = None
-            if chans is not None:
-                chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
-            else:
-                chans = []
-            muon_config["channels"][field] = chans
-
-    trigger_timestamp = table[field_config["ged_timestamp"]["table"]][
-        field_config["ged_timestamp"]["field"]
-    ].nda
-    if "hardware_tcm_2" in lh5.ls(args.tcm_file):
-        muon_table = build_evt(
-            {
-                "tcm": (args.tcm_file, "hardware_tcm_2", "ch{}"),
-                "dsp": (args.dsp_file, "dsp", "ch{}"),
-                "hit": (args.hit_file, "hit", "ch{}"),
-                "evt": (None, "evt"),
-            },
-            muon_config,
-        )
-
-        muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda
-        muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda
-        if len(muon_timestamp[muon_tbl_flag]) > 0:
-            is_muon_veto_triggered = find_matching_values_with_delay(
-                trigger_timestamp, muon_timestamp[muon_tbl_flag], field_config["jitter"]
-            )
-            muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered)
-        else:
-            muon_flag = np.zeros(len(trigger_timestamp), dtype=bool)
-    else:
-        muon_flag = np.zeros(len(trigger_timestamp), dtype=bool)
-    table[field_config["output_field"]["table"]].add_column(
-        field_config["output_field"]["field"], Array(muon_flag)
-    )
-
-sto.write(obj=table, name="evt", lh5_file=args.output, wo_mode="a")
-
-t_elap = time.time() - t_start
-log.info(f"Done!  Time elapsed: {t_elap:.2f} sec.")
diff --git a/workflow/src/legenddataflow/scripts/build_fdb.py b/workflow/src/legenddataflow/scripts/build_fdb.py
deleted file mode 100644
index 93a3567..0000000
--- a/workflow/src/legenddataflow/scripts/build_fdb.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import argparse
-import logging
-from pathlib import Path
-
-import numpy as np
-from dbetto.catalog import Props
-from lgdo import lh5
-from pygama.flow.file_db import FileDB
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--config", required=True)
-argparser.add_argument("--scan-path", required=True)
-argparser.add_argument("--output", required=True)
-argparser.add_argument("--log")
-argparser.add_argument("--assume-nonsparse", action="store_true")
-args = argparser.parse_args()
-
-config = Props.read_from(args.config)
-
-if args.log is not None:
-    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
-else:
-    logging.basicConfig(level=logging.DEBUG)
-
-logging.getLogger("legendmeta").setLevel(logging.INFO)
-logging.getLogger("numba").setLevel(logging.INFO)
-logging.getLogger("parse").setLevel(logging.INFO)
-logging.getLogger("lgdo").setLevel(logging.INFO)
-logging.getLogger("h5py._conv").setLevel(logging.INFO)
-
-log = logging.getLogger(__name__)
-
-fdb = FileDB(config, scan=False)
-fdb.scan_files([args.scan_path])
-fdb.scan_tables_columns(dir_files_conform=True)
-
-# augment dataframe with earliest timestamp found in file
-
-default = np.finfo("float64").max
-timestamps = np.zeros(len(fdb.df), dtype="float64")
-
-for i, row in enumerate(fdb.df.itertuples()):
-    store = lh5.LH5Store(
-        base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True
-    )
-
-    # list of first timestamps for each channel
-    loc_timestamps = np.full(len(row.raw_tables), fill_value=default, dtype="float64")
-
-    msg = f"finding first timestamp in {fdb.data_dir}/{fdb.tier_dirs['raw']}/{row.raw_file}"
-    log.info(msg)
-
-    found = False
-    for j, table in enumerate(row.raw_tables):
-        try:
-            loc_timestamps[j] = store.read(
-                fdb.table_format["raw"].format(ch=table) + "/timestamp",
-                row.raw_file.strip("/"),
-                n_rows=1,
-            )[0][0]
-            found = True
-        except KeyError:
-            pass
-
-        if found and args.assume_nonsparse:
-            break
-
-    if (loc_timestamps == default).all() or not found:
-        msg = "something went wrong! no valid first timestamp found"
-        raise RuntimeError(msg)
-
-    timestamps[i] = np.min(loc_timestamps)
-
-    msg = f"found {timestamps[i]}"
-    log.info(msg)
-
-    if timestamps[i] < 0 or timestamps[i] > 4102444800:
-        msg = "something went wrong! timestamp does not make sense"
-        raise RuntimeError(msg)
-
-fdb.df["first_timestamp"] = timestamps
-
-fdb.to_disk(args.output, wo_mode="of")
diff --git a/workflow/src/legenddataflow/scripts/build_hit.py b/workflow/src/legenddataflow/scripts/build_hit.py
deleted file mode 100644
index 47b0fa0..0000000
--- a/workflow/src/legenddataflow/scripts/build_hit.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import argparse
-import time
-from pathlib import Path
-
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata, TextDB
-from lgdo import lh5
-from pygama.hit.build_hit import build_hit
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--input", help="input file", type=str)
-argparser.add_argument("--pars_file", help="hit pars file", nargs="*")
-
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-argparser.add_argument("--log", help="log_file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--tier", help="Tier", type=str, required=True)
-
-argparser.add_argument("--output", help="output file", type=str)
-argparser.add_argument("--db_file", help="db file", type=str)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True)
-if args.tier == "hit" or args.tier == "pht":
-    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-        "tier_hit"
-    ]
-else:
-    msg = "unknown tier"
-    raise ValueError(msg)
-
-log = build_log(config_dict, args.log)
-
-channel_dict = config_dict["inputs"]["hit_config"]
-settings_dict = config_dict["options"].get("settings", {})
-if isinstance(settings_dict, str):
-    settings_dict = Props.read_from(settings_dict)
-
-meta = LegendMetadata(path=args.metadata)
-chan_map = meta.channelmap(args.timestamp, system=args.datatype)
-
-pars_dict = Props.read_from(args.pars_file)
-pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()}
-
-hit_dict = {}
-channels_present = lh5.ls(args.input)
-for channel in pars_dict:
-    chan_pars = pars_dict[channel].copy()
-    try:
-        detector = chan_map.map("daq.rawid")[int(channel[2:])].name
-        if detector in channel_dict:
-            cfg_dict = Props.read_from(channel_dict[detector])
-            Props.add_to(cfg_dict, chan_pars)
-            chan_pars = cfg_dict
-
-        if channel in channels_present:
-            hit_dict[f"{channel}/dsp"] = chan_pars
-    except KeyError:
-        pass
-
-t_start = time.time()
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output)
-t_elap = time.time() - t_start
-log.info(f"Done!  Time elapsed: {t_elap:.2f} sec.")
-
-hit_outputs = {}
-hit_channels = []
-for channel, file in channel_dict.items():
-    output = Props.read_from(file)["outputs"]
-    in_dict = False
-    for entry in hit_outputs:
-        if hit_outputs[entry]["fields"] == output:
-            hit_outputs[entry]["channels"].append(channel)
-            in_dict = True
-    if in_dict is False:
-        hit_outputs[f"group{len(list(hit_outputs))+1}"] = {
-            "channels": [channel],
-            "fields": output,
-        }
-    hit_channels.append(channel)
-
-key = args.output.replace(f"-tier_{args.tier}.lh5", "")
-
-full_dict = {
-    "valid_fields": {args.tier: hit_outputs},
-    "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}},
-}
-
-Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.db_file, full_dict)
diff --git a/workflow/src/legenddataflow/scripts/build_raw_blind.py b/workflow/src/legenddataflow/scripts/build_raw_blind.py
deleted file mode 100644
index 3d42717..0000000
--- a/workflow/src/legenddataflow/scripts/build_raw_blind.py
+++ /dev/null
@@ -1,181 +0,0 @@
-"""
-This script takes in raw data, applies the calibration to the daqenergy
-and uses this to blind the data in a window of Qbb +- 25 keV. It copies over all
-channels in a raw file, removing those events that fall within the ROI for Ge detectors
-that have a daqenergy calibration curve and are not anti-coincidence only (AC). It removes
-the whole event from all of the Ge and SiPM channels.
-
-In the Snakemake dataflow, this script only runs if the checkfile is found on disk,
-but this is controlled by the Snakemake flow (presumably an error is thrown if the file
-is not found). This script itself does not check for the existence of such a file.
-"""
-
-import argparse
-from pathlib import Path
-
-import numexpr as ne
-import numpy as np
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata, TextDB
-from lgdo import lh5
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--input", help="input file", type=str)
-argparser.add_argument("--output", help="output file", type=str)
-argparser.add_argument(
-    "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*"
-)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--configs", help="config file", type=str)
-argparser.add_argument("--chan_maps", help="chan map", type=str)
-argparser.add_argument("--metadata", help="metadata", type=str)
-argparser.add_argument("--log", help="log file", type=str)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "tier_raw"
-]
-
-log = build_log(config_dict, args.log)
-
-channel_dict = config_dict["inputs"]
-hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"]
-blinding_settings = Props.read_from(config_dict["config"])
-
-centroid = blinding_settings["centroid_in_keV"]  # keV
-width = blinding_settings["width_in_keV"]  # keV
-
-# list of all channels and objects in the raw file
-all_channels = lh5.ls(args.input)
-
-# list of Ge channels and SiPM channels with associated metadata
-legendmetadata = LegendMetadata(args.metadata, lazy=True)
-ged_channels = (
-    legendmetadata.channelmap(args.timestamp)
-    .map("system", unique=False)["geds"]
-    .map("daq.rawid")
-)
-spms_channels = (
-    legendmetadata.channelmap(args.timestamp)
-    .map("system", unique=False)["spms"]
-    .map("daq.rawid")
-)
-auxs_channels = (
-    legendmetadata.channelmap(args.timestamp)
-    .map("system", unique=False)["auxs"]
-    .map("daq.rawid")
-)
-blsn_channels = (
-    legendmetadata.channelmap(args.timestamp)
-    .map("system", unique=False)["bsln"]
-    .map("daq.rawid")
-)
-puls_channels = (
-    legendmetadata.channelmap(args.timestamp)
-    .map("system", unique=False)["puls"]
-    .map("daq.rawid")
-)
-
-store = lh5.LH5Store()
-
-# rows that need blinding
-toblind = np.array([])
-
-# first, loop through the Ge detector channels, calibrate them and look for events that should be blinded
-for chnum in list(ged_channels):
-    # skip Ge detectors that are anti-coincidence only or not able to be blinded for some other reason
-    if ged_channels[chnum]["analysis"]["is_blinded"] is False:
-        continue
-
-    # load in just the daqenergy for now
-    daqenergy, _ = store.read(f"ch{chnum}/raw/daqenergy", args.input)
-
-    # read in calibration curve for this channel
-    blind_curve = Props.read_from(args.blind_curve)[f"ch{chnum}"]["pars"]["operations"]
-
-    # calibrate daq energy using pre existing curve
-    daqenergy_cal = ne.evaluate(
-        blind_curve["daqenergy_cal"]["expression"],
-        local_dict=dict(
-            daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]
-        ),
-    )
-
-    # figure out which event indices should be blinded
-    toblind = np.append(
-        toblind, np.nonzero(np.abs(np.asarray(daqenergy_cal) - centroid) <= width)[0]
-    )
-
-# remove duplicates
-toblind = np.unique(toblind)
-
-# total number of events (from last Ge channel loaded, should be same for all Ge channels)
-allind = np.arange(len(daqenergy))
-
-# gets events that should not be blinded
-tokeep = allind[np.logical_not(np.isin(allind, toblind))]
-
-# make some temp file to write the output to before renaming it
-rng = np.random.default_rng()
-rand_num = f"{rng.integers(0,99999):05d}"
-temp_output = f"{args.output}.{rand_num}"
-Path(temp_output).parent.mkdir(parents=True, exist_ok=True)
-
-for channel in all_channels:
-    try:
-        chnum = int(channel[2::])
-    except ValueError:
-        # if this isn't an interesting channel, just copy it to the output file
-        chobj, _ = store.read(channel, args.input, decompress=False)
-        store.write_object(
-            chobj,
-            channel,
-            lh5_file=temp_output,
-            wo_mode="w",
-            **hdf_settings,
-        )
-        continue
-
-    if (
-        (chnum not in list(ged_channels))
-        and (chnum not in list(spms_channels))
-        and (chnum not in list(auxs_channels))
-        and (chnum not in list(blsn_channels))
-        and (chnum not in list(puls_channels))
-    ):
-        # if this is a PMT or not included for some reason, just copy it to the output file
-        chobj, _ = store.read(channel + "/raw", args.input, decompress=False)
-        store.write_object(
-            chobj,
-            group=channel,
-            name="raw",
-            lh5_file=temp_output,
-            wo_mode="w",
-            **hdf_settings,
-        )
-        continue
-
-    # the rest should be the Ge and SiPM channels that need to be blinded
-
-    # read in all of the data but only for the unblinded events
-    blinded_chobj, _ = store.read(
-        channel + "/raw", args.input, idx=tokeep, decompress=False
-    )
-
-    # now write the blinded data for this channel
-    store.write_object(
-        blinded_chobj,
-        group=channel,
-        name="raw",
-        lh5_file=temp_output,
-        wo_mode="w",
-        **hdf_settings,
-    )
-
-# rename the temp file
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/build_raw_fcio.py b/workflow/src/legenddataflow/scripts/build_raw_fcio.py
deleted file mode 100644
index 176565a..0000000
--- a/workflow/src/legenddataflow/scripts/build_raw_fcio.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import argparse
-from copy import deepcopy
-from pathlib import Path
-
-import numpy as np
-from daq2lh5 import build_raw
-from dbetto import TextDB
-from dbetto.catalog import Props
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("input", help="input file", type=str)
-argparser.add_argument("output", help="output file", type=str)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--configs", help="config file", type=str)
-argparser.add_argument("--chan_maps", help="chan map", type=str)
-argparser.add_argument("--log", help="log file", type=str)
-args = argparser.parse_args()
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
-config_dict = (
-    TextDB(args.configs, lazy=True)
-    .on(args.timestamp, system=args.datatype)
-    .snakemake_rules.tier_raw_fcio
-)
-
-log = build_log(config_dict, args.log)
-
-channel_dict = config_dict.inputs
-settings = Props.read_from(channel_dict.settings)
-channel_dict = channel_dict.out_spec
-all_config = Props.read_from(channel_dict.gen_config)
-
-chmap = TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system")
-
-if "geds_config" in channel_dict:
-    raise NotImplementedError()
-
-if "spms_config" in channel_dict:
-    spm_config = Props.read_from(channel_dict.spms_config)
-    spm_channels = chmap.spms.map("daq.rawid")
-
-    for rawid, chinfo in spm_channels.items():
-        cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"])
-        cfg_block["key_list"] = [chinfo.daq.fc_channel]
-        spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block
-
-    spm_config["FCEventDecoder"].pop("__output_table_name__")
-
-    Props.add_to(all_config, spm_config)
-
-if "auxs_config" in channel_dict:
-    raise NotImplementedError()
-
-if "muon_config" in channel_dict:
-    raise NotImplementedError()
-
-rng = np.random.default_rng()
-rand_num = f"{rng.integers(0,99999):05d}"
-temp_output = f"{args.output}.{rand_num}"
-
-build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
-
-# rename the temp file
-Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/build_raw_orca.py b/workflow/src/legenddataflow/scripts/build_raw_orca.py
deleted file mode 100644
index 72b5ac6..0000000
--- a/workflow/src/legenddataflow/scripts/build_raw_orca.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import argparse
-import logging
-from pathlib import Path
-
-import numpy as np
-from daq2lh5 import build_raw
-from dbetto import TextDB
-from dbetto.catalog import Props
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("input", help="input file", type=str)
-argparser.add_argument("output", help="output file", type=str)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--configs", help="config file", type=str)
-argparser.add_argument("--chan_maps", help="chan map", type=str)
-argparser.add_argument("--log", help="log file")
-args = argparser.parse_args()
-
-Path(args.log).parent.mkdir(parents=True, exist_ok=True)
-logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
-configs = TextDB(args.configs, lazy=True)
-config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
-    "tier_raw"
-]
-
-log = build_log(config_dict, args.log)
-
-channel_dict = config_dict["inputs"]
-settings = Props.read_from(channel_dict["settings"])
-channel_dict = channel_dict["out_spec"]
-all_config = Props.read_from(channel_dict["gen_config"])
-
-chmap = TextDB(args.chan_maps, lazy=True)
-
-if "geds_config" in list(channel_dict):
-    ged_config = Props.read_from(channel_dict["geds_config"])
-
-    ged_channels = list(
-        chmap.channelmaps.on(args.timestamp)
-        .map("system", unique=False)["geds"]
-        .map("daq.rawid")
-    )
-
-    ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
-    Props.add_to(all_config, ged_config)
-
-if "spms_config" in list(channel_dict):
-    spm_config = Props.read_from(channel_dict["spms_config"])
-
-    spm_channels = list(
-        chmap.channelmaps.on(args.timestamp)
-        .map("system", unique=False)["spms"]
-        .map("daq.rawid")
-    )
-
-    spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
-    Props.add_to(all_config, spm_config)
-
-if "auxs_config" in list(channel_dict):
-    aux_config = Props.read_from(channel_dict["auxs_config"])
-    aux_channels = list(
-        chmap.channelmaps.on(args.timestamp)
-        .map("system", unique=False)["auxs"]
-        .map("daq.rawid")
-    )
-    aux_channels += list(
-        chmap.channelmaps.on(args.timestamp)
-        .map("system", unique=False)["puls"]
-        .map("daq.rawid")
-    )
-    aux_channels += list(
-        chmap.channelmaps.on(args.timestamp)
-        .map("system", unique=False)["bsln"]
-        .map("daq.rawid")
-    )
-    top_key = next(iter(aux_config))
-    aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(
-        aux_channels
-    )
-    Props.add_to(all_config, aux_config)
-
-if "muon_config" in list(channel_dict):
-    muon_config = Props.read_from(channel_dict["muon_config"])
-    muon_channels = list(
-        chmap.channelmaps.on(args.timestamp)
-        .map("system", unique=False)["muon"]
-        .map("daq.rawid")
-    )
-    top_key = next(iter(muon_config))
-    muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(
-        muon_channels
-    )
-    Props.add_to(all_config, muon_config)
-
-rng = np.random.default_rng()
-rand_num = f"{rng.integers(0,99999):05d}"
-temp_output = f"{args.output}.{rand_num}"
-
-build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
-
-# rename the temp file
-Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/build_skm.py b/workflow/src/legenddataflow/scripts/build_skm.py
deleted file mode 100644
index 9411b1b..0000000
--- a/workflow/src/legenddataflow/scripts/build_skm.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import argparse
-
-import awkward as ak
-from dbetto import TextDB
-from dbetto.catalog import Props
-from lgdo import lh5
-from lgdo.types import Array, Struct, Table, VectorOfVectors
-
-from ..log import build_log
-
-
-def get_all_out_fields(input_table, out_fields, current_field=""):
-    for key in input_table:
-        field = input_table[key]
-        key_string = f"{current_field}.{key}"
-        if isinstance(field, (Table, Struct)):
-            get_all_out_fields(field, out_fields, key_string)
-        else:
-            if key_string not in out_fields:
-                out_fields.append(key_string)
-    return out_fields
-
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--evt_file", help="evt file", required=True)
-argparser.add_argument("--configs", help="configs", required=True)
-argparser.add_argument("--datatype", help="datatype", required=True)
-argparser.add_argument("--timestamp", help="timestamp", required=True)
-argparser.add_argument("--log", help="log file", default=None)
-argparser.add_argument("--output", help="output file", required=True)
-args = argparser.parse_args()
-
-# load in config
-config_dict = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)[
-    "snakemake_rules"
-]["tier_skm"]
-
-log = build_log(config_dict, args.log)
-
-
-skm_config_file = config_dict["inputs"]["skm_config"]
-evt_filter = Props.read_from(skm_config_file)["evt_filter"]
-out_fields = Props.read_from(skm_config_file)["keep_fields"]
-
-store = lh5.LH5Store()
-
-evt = lh5.read_as("evt", args.evt_file, "ak")
-
-# remove unwanted events
-skm = eval(f"evt[{evt_filter}]")
-# make it rectangular and make an LGDO Table
-out_table = Table(skm)
-
-for field in out_fields:
-    items = field.split(".")
-    ptr1 = out_table
-    for item in items[:-1]:
-        ptr1 = ptr1[item]
-
-    if isinstance(ptr1[items[-1]], Table):
-        out_fields.remove(field)
-        out_fields = get_all_out_fields(
-            ptr1[items[-1]], out_fields, current_field=field
-        )
-
-# remove unwanted columns
-out_table_skm = Table(size=len(out_table))
-for field in out_fields:
-    # table nesting is labeled by '.' in the config
-    items = field.split(".")
-    # get to actual nested field recursively
-    ptr1 = out_table
-    ptr2 = out_table_skm
-    for item in items[:-1]:
-        # make intermediate tables in new table
-        if item not in ptr2:
-            ptr2.add_field(item, Table(size=len(out_table)))
-        # get non-table LGDO recursively
-        ptr1 = ptr1[item]
-        ptr2 = ptr2[item]
-
-    # finally add column to new table
-    if isinstance(ptr1[items[-1]], VectorOfVectors):
-        ptr2.add_field(items[-1], Array(ak.flatten(ptr1[items[-1]].view_as("ak"))))
-    else:
-        ptr2.add_field(items[-1], ptr1[items[-1]])
-    attrs = ptr1[items[-1]].attrs
-
-    # forward LGDO attributes
-    # attrs = evt[field.replace(".", "_")].attrs
-    for attr, val in attrs.items():
-        if attr != "datatype":
-            ptr2.attrs[attr] = val
-
-# write-append to disk
-store.write(out_table_skm, "skm", args.output, wo_mode="w")
diff --git a/workflow/src/legenddataflow/scripts/build_tcm.py b/workflow/src/legenddataflow/scripts/build_tcm.py
deleted file mode 100644
index 7e6ab73..0000000
--- a/workflow/src/legenddataflow/scripts/build_tcm.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-from daq2lh5.orca import orca_flashcam
-from dbetto import TextDB
-from dbetto.catalog import Props
-from pygama.evt.build_tcm import build_tcm
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("input", help="input file", type=str)
-argparser.add_argument("output", help="output file", type=str)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--configs", help="config file", type=str)
-argparser.add_argument("--log", help="log file", type=str)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["tier_tcm"]
-
-log = build_log(config_dict, args.log)
-
-settings = Props.read_from(config_dict["inputs"]["config"])
-
-rng = np.random.default_rng()
-temp_output = f"{args.output}.{rng.integers(0, 99999):05d}"
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
-# get the list of channels by fcid
-ch_list = lh5.ls(args.input, "/ch*")
-fcid_channels = {}
-for ch in ch_list:
-    key = int(ch[2:])
-    fcid = orca_flashcam.get_fcid(key)
-    if fcid not in fcid_channels:
-        fcid_channels[fcid] = []
-    fcid_channels[fcid].append(f"/{ch}/raw")
-
-# make a hardware_tcm_[fcid] for each fcid
-for fcid, fcid_dict in fcid_channels.items():
-    build_tcm(
-        [(args.input, fcid_dict)],
-        out_file=temp_output,
-        out_name=f"hardware_tcm_{fcid}",
-        wo_mode="o",
-        **settings,
-    )
-
-Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/check_blinding.py b/workflow/src/legenddataflow/scripts/check_blinding.py
deleted file mode 100644
index faf800d..0000000
--- a/workflow/src/legenddataflow/scripts/check_blinding.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""
-This script checks that the blinding for a particular channel is still valid,
-it does this by taking the calibration curve stored in the overrides, applying it
-to the daqenergy, running a peak search over the calibrated energy and checking that
-there are peaks within 5keV of the 583 and 2614 peaks. If the detector is in ac mode
-then it will skip the check.
-"""
-
-import argparse
-import pickle as pkl
-from pathlib import Path
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import numexpr as ne
-import numpy as np
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from lgdo import lh5
-from pygama.math.histogram import get_hist
-from pygama.pargen.energy_cal import get_i_local_maxima
-
-from ..log import build_log
-
-mpl.use("Agg")
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--files", help="files", nargs="*", type=str)
-argparser.add_argument("--output", help="output file", type=str)
-argparser.add_argument("--plot_file", help="plot file", type=str)
-argparser.add_argument(
-    "--blind_curve", help="blinding curves file", nargs="*", type=str
-)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--configs", help="config file", type=str)
-argparser.add_argument("--channel", help="channel", type=str)
-argparser.add_argument("--metadata", help="channel", type=str)
-argparser.add_argument("--log", help="log file", type=str)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"]
-
-log = build_log(config_dict, args.log)
-
-# get the usability status for this channel
-chmap = (
-    LegendMetadata(args.metadata, lazy=True).channelmap(args.timestamp).map("daq.rawid")
-)
-det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"]
-
-# read in calibration curve for this channel
-blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"]
-
-# load in the data
-daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as(
-    "np"
-)
-
-# calibrate daq energy using pre existing curve
-daqenergy_cal = ne.evaluate(
-    blind_curve["daqenergy_cal"]["expression"],
-    local_dict=dict(daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]),
-)
-
-# bin with 1 keV bins and get maxs
-hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1))
-maxs = get_i_local_maxima(hist, delta=25)
-log.info(f"peaks found at : {maxs}")
-
-# plot the energy spectrum to check calibration
-fig = plt.figure(figsize=(8, 10))
-ax = plt.subplot(211)
-ax.hist(daqenergy_cal, bins=np.arange(0, 3000, 1), histtype="step")
-ax.set_ylabel("counts")
-ax.set_yscale("log")
-ax2 = plt.subplot(212)
-ax2.hist(
-    daqenergy_cal,
-    bins=np.arange(2600, 2630, 1 * blind_curve["daqenergy_cal"]["parameters"]["a"]),
-    histtype="step",
-)
-ax2.set_xlabel("energy (keV)")
-ax2.set_ylabel("counts")
-plt.suptitle(args.channel)
-with Path(args.plot_file).open("wb") as w:
-    pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
-plt.close()
-
-# check for peaks within +- 5keV of  2614 and 583 to ensure blinding still
-# valid and if so create file else raise error.  if detector is in ac mode it
-# will always pass this check
-if (
-    np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)
-) or det_status is False:
-    Path(args.output).mkdir(parents=True, exist_ok=True)
-    Props.write_to(
-        args.output,
-        {
-            "threshold_adc": np.nanmin(daqenergy),
-            "threshold_kev": np.nanmin(daqenergy_cal),
-        },
-    )
-else:
-    msg = "peaks not found in daqenergy"
-    raise RuntimeError(msg)
diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py
index e3892eb..eff7a90 100644
--- a/workflow/src/legenddataflow/scripts/complete_run.py
+++ b/workflow/src/legenddataflow/scripts/complete_run.py
@@ -7,9 +7,9 @@
 import time
 from pathlib import Path
 
+from .. import patterns
+from .. import utils as ut
 from ..FileKey import FileKey
-from . import patterns
-from . import utils as ut
 
 print("INFO: dataflow ran successfully, now few final checks and scripts")
 
diff --git a/workflow/src/legenddataflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py
index a75be8b..9566068 100644
--- a/workflow/src/legenddataflow/scripts/create_chankeylist.py
+++ b/workflow/src/legenddataflow/scripts/create_chankeylist.py
@@ -4,27 +4,29 @@
 from dbetto import TextDB
 from legendmeta import LegendMetadata
 
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--det_status", help="det_status", type=str, required=True)
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True)
 
-argparser.add_argument("--output_file", help="output_file", type=str, required=True)
-args = argparser.parse_args()
+def create_chankeylist() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--det_status", help="det_status", type=str, required=True)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True)
 
-det_status = TextDB(args.det_status, lazy=True)
-status_map = det_status.statuses.on(args.timestamp, system=args.datatype)
+    argparser.add_argument("--output_file", help="output_file", type=str, required=True)
+    args = argparser.parse_args()
 
-channel_map = LegendMetadata(args.channelmap, lazy=True)
-chmap = channel_map.channelmaps.on(args.timestamp)
+    det_status = TextDB(args.det_status, lazy=True)
+    status_map = det_status.statuses.on(args.timestamp, system=args.datatype)
 
-channels = [
-    chan
-    for chan in status_map
-    if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
-]
-Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
-with Path(args.output_file).open("w") as f:
-    for chan in channels:
-        f.write(f"{chan}\n")
+    channel_map = LegendMetadata(args.channelmap, lazy=True)
+    chmap = channel_map.channelmaps.on(args.timestamp)
+
+    channels = [
+        chan
+        for chan in status_map
+        if status_map[chan]["processable"] is True and chmap[chan].system == "geds"
+    ]
+    Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.output_file).open("w") as f:
+        for chan in channels:
+            f.write(f"{chan}\n")
diff --git a/workflow/src/legenddataflow/scripts/filedb.py b/workflow/src/legenddataflow/scripts/filedb.py
new file mode 100644
index 0000000..d9b52d8
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/filedb.py
@@ -0,0 +1,88 @@
+import argparse
+import logging
+from pathlib import Path
+
+import numpy as np
+from dbetto.catalog import Props
+from lgdo import lh5
+from pygama.flow.file_db import FileDB
+
+
+def build_filedb() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--config", required=True)
+    argparser.add_argument("--scan-path", required=True)
+    argparser.add_argument("--output", required=True)
+    argparser.add_argument("--log")
+    argparser.add_argument("--assume-nonsparse", action="store_true")
+    args = argparser.parse_args()
+
+    config = Props.read_from(args.config)
+
+    if args.log is not None:
+        Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+        logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    else:
+        logging.basicConfig(level=logging.DEBUG)
+
+    logging.getLogger("legendmeta").setLevel(logging.INFO)
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("h5py._conv").setLevel(logging.INFO)
+
+    log = logging.getLogger(__name__)
+
+    fdb = FileDB(config, scan=False)
+    fdb.scan_files([args.scan_path])
+    fdb.scan_tables_columns(dir_files_conform=True)
+
+    # augment dataframe with earliest timestamp found in file
+
+    default = np.finfo("float64").max
+    timestamps = np.zeros(len(fdb.df), dtype="float64")
+
+    for i, row in enumerate(fdb.df.itertuples()):
+        store = lh5.LH5Store(
+            base_path=f"{fdb.data_dir}/{fdb.tier_dirs['raw']}", keep_open=True
+        )
+
+        # list of first timestamps for each channel
+        loc_timestamps = np.full(
+            len(row.raw_tables), fill_value=default, dtype="float64"
+        )
+
+        msg = f"finding first timestamp in {fdb.data_dir}/{fdb.tier_dirs['raw']}/{row.raw_file}"
+        log.info(msg)
+
+        found = False
+        for j, table in enumerate(row.raw_tables):
+            try:
+                loc_timestamps[j] = store.read(
+                    fdb.table_format["raw"].format(ch=table) + "/timestamp",
+                    row.raw_file.strip("/"),
+                    n_rows=1,
+                )[0][0]
+                found = True
+            except KeyError:
+                pass
+
+            if found and args.assume_nonsparse:
+                break
+
+        if (loc_timestamps == default).all() or not found:
+            msg = "something went wrong! no valid first timestamp found"
+            raise RuntimeError(msg)
+
+        timestamps[i] = np.min(loc_timestamps)
+
+        msg = f"found {timestamps[i]}"
+        log.info(msg)
+
+        if timestamps[i] < 0 or timestamps[i] > 4102444800:
+            msg = "something went wrong! timestamp does not make sense"
+            raise RuntimeError(msg)
+
+    fdb.df["first_timestamp"] = timestamps
+
+    fdb.to_disk(args.output, wo_mode="of")
diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py
index d6fec7a..6fee6f5 100644
--- a/workflow/src/legenddataflow/scripts/merge_channels.py
+++ b/workflow/src/legenddataflow/scripts/merge_channels.py
@@ -24,138 +24,142 @@ def replace_path(d, old_path, new_path):
     return d
 
 
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--input", help="input file", nargs="*", type=str, required=True)
-argparser.add_argument("--output", help="output file", type=str, required=True)
-argparser.add_argument(
-    "--in_db",
-    help="in db file (used for when lh5 files referred to in db)",
-    type=str,
-    required=False,
-)
-argparser.add_argument(
-    "--out_db",
-    help="lh5 file (used for when lh5 files referred to in db)",
-    type=str,
-    required=False,
-)
-argparser.add_argument(
-    "--channelmap",
-    help="channelmap",
-    type=str,
-    required=False,
-    default=None,
-)
-argparser.add_argument(
-    "--timestamp",
-    help="timestamp",
-    type=str,
-    required=False,
-)
-args = argparser.parse_args()
-
-# change to only have 1 output file for multiple inputs
-# don't care about processing step, check if extension matches
-
-channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input
-
-file_extension = Path(args.output).suffix
-
-if args.channelmap is not None:
-    channel_map = LegendMetadata(args.channelmap, lazy=True)
-    chmap = channel_map.channelmap(args.timestamp)
-else:
-    chmap = None
-
-if file_extension == ".dat" or file_extension == ".dir":
-    out_file = Path(args.output).with_suffix("")
-else:
-    out_file = args.output
-
-rng = np.random.default_rng()
-temp_output = f"{out_file}.{rng.integers(0, 99999):05d}"
-
-Path(args.output).parent.mkdir(parents=True, exist_ok=True)
-
-if file_extension in (".json", ".yaml", ".yml"):
-    out_dict = {}
-    for channel in channel_files:
-        if Path(channel).suffix == file_extension:
-            channel_dict = Props.read_from(channel)
-            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-            if chmap is not None:
-                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+def merge_channels() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument(
+        "--input", help="input file", nargs="*", type=str, required=True
+    )
+    argparser.add_argument("--output", help="output file", type=str, required=True)
+    argparser.add_argument(
+        "--in_db",
+        help="in db file (used for when lh5 files referred to in db)",
+        type=str,
+        required=False,
+    )
+    argparser.add_argument(
+        "--out_db",
+        help="lh5 file (used for when lh5 files referred to in db)",
+        type=str,
+        required=False,
+    )
+    argparser.add_argument(
+        "--channelmap",
+        help="channelmap",
+        type=str,
+        required=False,
+        default=None,
+    )
+    argparser.add_argument(
+        "--timestamp",
+        help="timestamp",
+        type=str,
+        required=False,
+    )
+    args = argparser.parse_args()
+
+    # change to only have 1 output file for multiple inputs
+    # don't care about processing step, check if extension matches
+
+    channel_files = args.input.infiles if hasattr(args.input, "infiles") else args.input
+
+    file_extension = Path(args.output).suffix
+
+    if args.channelmap is not None:
+        channel_map = LegendMetadata(args.channelmap, lazy=True)
+        chmap = channel_map.channelmap(args.timestamp)
+    else:
+        chmap = None
+
+    if file_extension == ".dat" or file_extension == ".dir":
+        out_file = Path(args.output).with_suffix("")
+    else:
+        out_file = args.output
+
+    rng = np.random.default_rng()
+    temp_output = f"{out_file}.{rng.integers(0, 99999):05d}"
+
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    if file_extension in (".json", ".yaml", ".yml"):
+        out_dict = {}
+        for channel in channel_files:
+            if Path(channel).suffix == file_extension:
+                channel_dict = Props.read_from(channel)
+                fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
+                if chmap is not None:
+                    channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+                else:
+                    channel_name = fkey.channel
+                out_dict[channel_name] = channel_dict
             else:
-                channel_name = fkey.channel
-            out_dict[channel_name] = channel_dict
-        else:
-            msg = "Output file extension does not match input file extension"
-            raise RuntimeError(msg)
-
-    Props.write_to(out_file, out_dict)
-
-elif file_extension == ".pkl":
-    out_dict = {}
-    for channel in channel_files:
-        with Path(channel).open("rb") as r:
-            channel_dict = pkl.load(r)
-        fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
-        if chmap is not None:
-            channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
-        else:
-            channel_name = fkey.channel
-        out_dict[channel_name] = channel_dict
-
-    with Path(temp_output).open("wb") as w:
-        pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
-
-    Path(temp_output).rename(out_file)
-
-elif file_extension == ".dat" or file_extension == ".dir":
-    common_dict = {}
-    with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
+                msg = "Output file extension does not match input file extension"
+                raise RuntimeError(msg)
+
+        Props.write_to(out_file, out_dict)
+
+    elif file_extension == ".pkl":
+        out_dict = {}
         for channel in channel_files:
             with Path(channel).open("rb") as r:
                 channel_dict = pkl.load(r)
-            fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel_files[0]).name)
-            if chmap is not None:
-                channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
-            else:
-                channel_name = fkey.channel
-            if isinstance(channel_dict, dict) and "common" in list(channel_dict):
-                chan_common_dict = channel_dict.pop("common")
-                common_dict[channel_name] = chan_common_dict
-            shelf[channel_name] = channel_dict
-        if len(common_dict) > 0:
-            shelf["common"] = common_dict
-
-
-elif file_extension == ".lh5":
-    if args.in_db:
-        db_dict = Props.read_from(args.in_db)
-    for channel in channel_files:
-        if Path(channel).suffix == file_extension:
             fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
             if chmap is not None:
                 channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
             else:
                 channel_name = fkey.channel
-            tb_in = lh5.read(f"{channel_name}", channel)
-
-            lh5.write(
-                tb_in,
-                name=channel_name,
-                lh5_file=temp_output,
-                wo_mode="a",
-            )
-            if args.in_db:
-                db_dict[channel_name] = replace_path(
-                    db_dict[channel_name], channel, args.output
+            out_dict[channel_name] = channel_dict
+
+        with Path(temp_output).open("wb") as w:
+            pkl.dump(out_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
+
+        Path(temp_output).rename(out_file)
+
+    elif file_extension == ".dat" or file_extension == ".dir":
+        common_dict = {}
+        with shelve.open(str(out_file), "c", protocol=pkl.HIGHEST_PROTOCOL) as shelf:
+            for channel in channel_files:
+                with Path(channel).open("rb") as r:
+                    channel_dict = pkl.load(r)
+                fkey = ChannelProcKey.get_filekey_from_pattern(
+                    Path(channel_files[0]).name
+                )
+                if chmap is not None:
+                    channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+                else:
+                    channel_name = fkey.channel
+                if isinstance(channel_dict, dict) and "common" in list(channel_dict):
+                    chan_common_dict = channel_dict.pop("common")
+                    common_dict[channel_name] = chan_common_dict
+                shelf[channel_name] = channel_dict
+            if len(common_dict) > 0:
+                shelf["common"] = common_dict
+
+    elif file_extension == ".lh5":
+        if args.in_db:
+            db_dict = Props.read_from(args.in_db)
+        for channel in channel_files:
+            if Path(channel).suffix == file_extension:
+                fkey = ChannelProcKey.get_filekey_from_pattern(Path(channel).name)
+                if chmap is not None:
+                    channel_name = f"ch{chmap[fkey.channel].daq.rawid:07}"
+                else:
+                    channel_name = fkey.channel
+                tb_in = lh5.read(f"{channel_name}", channel)
+
+                lh5.write(
+                    tb_in,
+                    name=channel_name,
+                    lh5_file=temp_output,
+                    wo_mode="a",
                 )
-        else:
-            msg = "Output file extension does not match input file extension"
-            raise RuntimeError(msg)
-    if args.out_db:
-        Props.write_to(args.out_db, db_dict)
+                if args.in_db:
+                    db_dict[channel_name] = replace_path(
+                        db_dict[channel_name], channel, args.output
+                    )
+            else:
+                msg = "Output file extension does not match input file extension"
+                raise RuntimeError(msg)
+        if args.out_db:
+            Props.write_to(args.out_db, db_dict)
 
-    Path(temp_output).rename(out_file)
+        Path(temp_output).rename(out_file)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
new file mode 100644
index 0000000..1bea45d
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
@@ -0,0 +1,150 @@
+import argparse
+import logging
+import pickle as pkl
+import time
+from pathlib import Path
+
+import lgdo.lh5 as lh5
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from lgdo import Array, Table
+from pygama.pargen.dplms_ge_dict import dplms_ge_dict
+
+from ....log import build_log
+
+
+def par_geds_dsp_dplms() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
+    argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
+    argparser.add_argument("--inplots", help="in_plot_path", type=str)
+    argparser.add_argument("--database", help="database", type=str, required=True)
+
+    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
+    argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True)
+    argparser.add_argument("--plot_path", help="plot_path", type=str)
+
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_dplms"]
+
+    log = build_log(config_dict, args.log)
+
+    log = logging.getLogger(__name__)
+    sto = lh5.LH5Store()
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
+    configs = LegendMetadata(args.configs, lazy=True).on(
+        args.timestamp, system=args.datatype
+    )
+    dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
+
+    dplms_json = config_dict["inputs"]["dplms_pars"][args.channel]
+    dplms_dict = Props.read_from(dplms_json)
+
+    db_dict = Props.read_from(args.database)
+
+    if dplms_dict["run_dplms"] is True:
+        with Path(args.fft_raw_filelist).open() as f:
+            fft_files = sorted(f.read().splitlines())
+
+        t0 = time.time()
+        log.info("\nLoad fft data")
+        energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0]
+        idxs = np.where(energies.nda == 0)[0]
+        raw_fft = sto.read(
+            f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
+        )[0]
+        t1 = time.time()
+        log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
+
+        log.info("\nRunning event selection")
+        peaks_kev = np.array(dplms_dict["peaks_kev"])
+        # kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
+
+        peaks_rounded = [int(peak) for peak in peaks_kev]
+        peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][
+            "peak"
+        ].nda
+        ids = np.isin(peaks, peaks_rounded)
+        peaks = peaks[ids]
+        # idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
+
+        raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
+        log.info(
+            f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}"
+        )
+
+        if isinstance(dsp_config, (str, list)):
+            dsp_config = Props.read_from(dsp_config)
+
+        if args.plot_path:
+            out_dict, plot_dict = dplms_ge_dict(
+                raw_fft,
+                raw_cal,
+                dsp_config,
+                db_dict,
+                dplms_dict,
+                display=1,
+            )
+            if args.inplots:
+                with Path(args.inplots).open("rb") as r:
+                    inplot_dict = pkl.load(r)
+                inplot_dict.update({"dplms": plot_dict})
+
+        else:
+            out_dict = dplms_ge_dict(
+                raw_fft,
+                raw_cal,
+                dsp_config,
+                db_dict,
+                dplms_dict,
+            )
+
+        coeffs = out_dict["dplms"].pop("coefficients")
+        dplms_pars = Table(col_dict={"coefficients": Array(coeffs)})
+        out_dict["dplms"]["coefficients"] = (
+            f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')"
+        )
+
+        log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
+    else:
+        out_dict = {}
+        dplms_pars = Table(col_dict={"coefficients": Array([])})
+        if args.inplots:
+            with Path(args.inplots).open("rb") as r:
+                inplot_dict = pkl.load(r)
+        else:
+            inplot_dict = {}
+
+    db_dict.update(out_dict)
+
+    Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True)
+    sto.write(
+        Table(col_dict={"dplms": dplms_pars}),
+        name=channel,
+        lh5_file=args.lh5_path,
+        wo_mode="overwrite",
+    )
+
+    Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.dsp_pars, db_dict)
+
+    if args.plot_path:
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_path).open("wb") as f:
+            pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
new file mode 100644
index 0000000..4b755c2
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
@@ -0,0 +1,398 @@
+import argparse
+import pickle as pkl
+import time
+import warnings
+from pathlib import Path
+
+import lgdo.lh5 as lh5
+import numpy as np
+import pygama.pargen.energy_optimisation as om  # noqa: F401
+import sklearn.gaussian_process.kernels as ker
+from dbetto import TextDB
+from dbetto.catalog import Props
+from dspeed.units import unit_registry as ureg
+from legendmeta import LegendMetadata
+from pygama.math.distributions import hpge_peak
+from pygama.pargen.dsp_optimize import (
+    BayesianOptimizer,
+    run_bayesian_optimisation,
+    run_one_dsp,
+)
+
+from ....log import build_log
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+warnings.filterwarnings(action="ignore", category=np.RankWarning)
+
+
+def par_geds_dsp_eopt() -> None:
+    argparser = argparse.ArgumentParser()
+
+    argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
+    argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
+    argparser.add_argument("--inplots", help="in_plot_path", type=str)
+
+    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument(
+        "--final_dsp_pars", help="final_dsp_pars", type=str, required=True
+    )
+    argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str)
+    argparser.add_argument("--plot_path", help="plot_path", type=str)
+
+    argparser.add_argument(
+        "--plot_save_path", help="plot_save_path", type=str, required=False
+    )
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
+
+    log = build_log(config_dict, args.log)
+
+    sto = lh5.LH5Store()
+    t0 = time.time()
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
+    dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+    opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
+
+    opt_dict = Props.read_from(opt_json)
+    db_dict = Props.read_from(args.decay_const)
+
+    if opt_dict.pop("run_eopt") is True:
+        peaks_kev = np.array(opt_dict["peaks"])
+        kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]]
+
+        kwarg_dicts_cusp = []
+        kwarg_dicts_trap = []
+        kwarg_dicts_zac = []
+        for peak in peaks_kev:
+            peak_idx = np.where(peaks_kev == peak)[0][0]
+            kev_width = kev_widths[peak_idx]
+
+            kwarg_dicts_cusp.append(
+                {
+                    "parameter": "cuspEmax",
+                    "func": hpge_peak,
+                    "peak": peak,
+                    "kev_width": kev_width,
+                    "bin_width": 5,
+                }
+            )
+            kwarg_dicts_zac.append(
+                {
+                    "parameter": "zacEmax",
+                    "func": hpge_peak,
+                    "peak": peak,
+                    "kev_width": kev_width,
+                    "bin_width": 5,
+                }
+            )
+            kwarg_dicts_trap.append(
+                {
+                    "parameter": "trapEmax",
+                    "func": hpge_peak,
+                    "peak": peak,
+                    "kev_width": kev_width,
+                    "bin_width": 5,
+                }
+            )
+
+        peaks_rounded = [int(peak) for peak in peaks_kev]
+        peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][
+            "peak"
+        ].nda
+        ids = np.isin(peaks, peaks_rounded)
+        peaks = peaks[ids]
+        idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
+
+        tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
+
+        t1 = time.time()
+        log.info(f"Data Loaded in {(t1-t0)/60} minutes")
+
+        if isinstance(dsp_config, (str, list)):
+            dsp_config = Props.read_from(dsp_config)
+
+        dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"]
+
+        init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
+        full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]]
+        flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10
+
+        if flat_val < 1.0:
+            flat_val = 1.0
+        elif flat_val > 4:
+            flat_val = 4
+        flat_val = f"{flat_val}*us"
+
+        db_dict["cusp"] = {"flat": flat_val}
+        db_dict["zac"] = {"flat": flat_val}
+        db_dict["etrap"] = {"flat": flat_val}
+
+        tb_data.add_column("dt_eff", init_data["dt_eff"])
+
+        dsp_config["processors"].pop("dt_eff")
+
+        dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"]
+
+        kwarg_dict = [
+            {
+                "peak_dicts": kwarg_dicts_cusp,
+                "ctc_param": "dt_eff",
+                "idx_list": idx_list,
+                "peaks_kev": peaks_kev,
+            },
+            {
+                "peak_dicts": kwarg_dicts_zac,
+                "ctc_param": "dt_eff",
+                "idx_list": idx_list,
+                "peaks_kev": peaks_kev,
+            },
+            {
+                "peak_dicts": kwarg_dicts_trap,
+                "ctc_param": "dt_eff",
+                "idx_list": idx_list,
+                "peaks_kev": peaks_kev,
+            },
+        ]
+
+        fom = eval(opt_dict["fom"])
+        out_field = opt_dict["fom_field"]
+        out_err_field = opt_dict["fom_err_field"]
+        sample_x = np.array(opt_dict["initial_samples"])
+
+        results_cusp = []
+        results_zac = []
+        results_trap = []
+
+        sample_y_cusp = []
+        sample_y_zac = []
+        sample_y_trap = []
+
+        err_y_cusp = []
+        err_y_zac = []
+        err_y_trap = []
+
+        for i, x in enumerate(sample_x):
+            db_dict["cusp"]["sigma"] = f"{x[0]}*us"
+            db_dict["zac"]["sigma"] = f"{x[0]}*us"
+            db_dict["etrap"]["rise"] = f"{x[0]}*us"
+
+            log.info(f"Initialising values {i+1} : {db_dict}")
+
+            tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
+
+            res = fom(tb_out, kwarg_dict[0])
+            results_cusp.append(res)
+            sample_y_cusp.append(res[out_field])
+            err_y_cusp.append(res[out_err_field])
+
+            res = fom(tb_out, kwarg_dict[1])
+            results_zac.append(res)
+            sample_y_zac.append(res[out_field])
+            err_y_zac.append(res[out_err_field])
+
+            res = fom(tb_out, kwarg_dict[2])
+            results_trap.append(res)
+            sample_y_trap.append(res[out_field])
+            err_y_trap.append(res[out_err_field])
+
+            log.info(f"{i+1} Finished")
+
+        if np.isnan(sample_y_cusp).all():
+            max_cusp = opt_dict["nan_default"]
+        else:
+            max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2)
+        if np.isnan(sample_y_zac).all():
+            max_zac = opt_dict["nan_default"]
+        else:
+            max_zac = np.ceil(np.nanmax(sample_y_zac) * 2)
+        if np.isnan(sample_y_trap).all():
+            max_trap = opt_dict["nan_default"]
+        else:
+            max_trap = np.ceil(np.nanmax(sample_y_trap) * 2)
+
+        nan_vals = [max_cusp, max_zac, max_trap]
+
+        for i in range(len(sample_x)):
+            if np.isnan(sample_y_cusp[i]):
+                results_cusp[i]["y_val"] = max_cusp
+                sample_y_cusp[i] = max_cusp
+
+            if np.isnan(sample_y_zac[i]):
+                results_zac[i]["y_val"] = max_zac
+                sample_y_zac[i] = max_zac
+
+            if np.isnan(sample_y_trap[i]):
+                results_trap[i]["y_val"] = max_trap
+                sample_y_trap[i] = max_trap
+
+        kernel = (
+            ker.ConstantKernel(2.0, constant_value_bounds="fixed")
+            + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5])
+            + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1))
+        )
+
+        lambda_param = 5
+        sampling_rate = tb_data["waveform_presummed"]["dt"][0]
+        sampling_unit = ureg.Quantity(
+            tb_data["waveform_presummed"]["dt"].attrs["units"]
+        )
+        waveform_sampling = sampling_rate * sampling_unit
+
+        bopt_cusp = BayesianOptimizer(
+            acq_func=opt_dict["acq_func"],
+            batch_size=opt_dict["batch_size"],
+            kernel=kernel,
+            sampling_rate=waveform_sampling,
+            fom_value=out_field,
+            fom_error=out_err_field,
+        )
+        bopt_cusp.lambda_param = lambda_param
+        bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us")
+
+        bopt_zac = BayesianOptimizer(
+            acq_func=opt_dict["acq_func"],
+            batch_size=opt_dict["batch_size"],
+            kernel=kernel,
+            sampling_rate=waveform_sampling,
+            fom_value=out_field,
+            fom_error=out_err_field,
+        )
+        bopt_zac.lambda_param = lambda_param
+        bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us")
+
+        bopt_trap = BayesianOptimizer(
+            acq_func=opt_dict["acq_func"],
+            batch_size=opt_dict["batch_size"],
+            kernel=kernel,
+            sampling_rate=waveform_sampling,
+            fom_value=out_field,
+            fom_error=out_err_field,
+        )
+        bopt_trap.lambda_param = lambda_param
+        bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
+
+        bopt_cusp.add_initial_values(
+            x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp
+        )
+        bopt_zac.add_initial_values(
+            x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac
+        )
+        bopt_trap.add_initial_values(
+            x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap
+        )
+
+        best_idx = np.nanargmin(sample_y_cusp)
+        bopt_cusp.optimal_results = results_cusp[best_idx]
+        bopt_cusp.optimal_x = sample_x[best_idx]
+
+        best_idx = np.nanargmin(sample_y_zac)
+        bopt_zac.optimal_results = results_zac[best_idx]
+        bopt_zac.optimal_x = sample_x[best_idx]
+
+        best_idx = np.nanargmin(sample_y_trap)
+        bopt_trap.optimal_results = results_trap[best_idx]
+        bopt_trap.optimal_x = sample_x[best_idx]
+
+        optimisers = [bopt_cusp, bopt_zac, bopt_trap]
+
+        out_param_dict, out_results_list = run_bayesian_optimisation(
+            tb_data,
+            dsp_config,
+            [fom],
+            optimisers,
+            fom_kwargs=kwarg_dict,
+            db_dict=db_dict,
+            nan_val=nan_vals,
+            n_iter=opt_dict["n_iter"],
+        )
+
+        Props.add_to(db_dict, out_param_dict)
+
+        # db_dict.update(out_param_dict)
+
+        t2 = time.time()
+        log.info(f"Optimiser finished in {(t2-t1)/60} minutes")
+
+        out_alpha_dict = {}
+        out_alpha_dict["cuspEmax_ctc"] = {
+            "expression": "cuspEmax*(1+dt_eff*a)",
+            "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
+        }
+
+        out_alpha_dict["cuspEftp_ctc"] = {
+            "expression": "cuspEftp*(1+dt_eff*a)",
+            "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
+        }
+
+        out_alpha_dict["zacEmax_ctc"] = {
+            "expression": "zacEmax*(1+dt_eff*a)",
+            "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
+        }
+
+        out_alpha_dict["zacEftp_ctc"] = {
+            "expression": "zacEftp*(1+dt_eff*a)",
+            "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
+        }
+
+        out_alpha_dict["trapEmax_ctc"] = {
+            "expression": "trapEmax*(1+dt_eff*a)",
+            "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
+        }
+
+        out_alpha_dict["trapEftp_ctc"] = {
+            "expression": "trapEftp*(1+dt_eff*a)",
+            "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
+        }
+        if "ctc_params" in db_dict:
+            db_dict["ctc_params"].update(out_alpha_dict)
+        else:
+            db_dict.update({"ctc_params": out_alpha_dict})
+
+        Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.qbb_grid_path).open("wb") as f:
+            pkl.dump(optimisers, f)
+
+    else:
+        Path(args.qbb_grid_path).touch()
+
+    Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.final_dsp_pars, db_dict)
+
+    if args.plot_path:
+        if args.inplots:
+            with Path(args.inplots).open("rb") as r:
+                plot_dict = pkl.load(r)
+        else:
+            plot_dict = {}
+
+        plot_dict["trap_optimisation"] = {
+            "kernel_space": bopt_trap.plot(init_samples=sample_x),
+            "acq_space": bopt_trap.plot_acq(init_samples=sample_x),
+        }
+
+        plot_dict["cusp_optimisation"] = {
+            "kernel_space": bopt_cusp.plot(init_samples=sample_x),
+            "acq_space": bopt_cusp.plot_acq(init_samples=sample_x),
+        }
+
+        plot_dict["zac_optimisation"] = {
+            "kernel_space": bopt_zac.plot(init_samples=sample_x),
+            "acq_space": bopt_zac.plot_acq(init_samples=sample_x),
+        }
+
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_path).open("wb") as w:
+            pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
similarity index 98%
rename from workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
rename to workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
index 2c01421..e9b1de6 100644
--- a/workflow/src/legenddataflow/scripts/pars_dsp_evtsel_geds.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
@@ -16,7 +16,7 @@
 from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
 from pygama.pargen.dsp_optimize import run_one_dsp
 
-from ..log import build_log
+from ....log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -80,7 +80,7 @@ def get_out_data(
     return out_tbl, len(np.where(final_mask)[0])
 
 
-if __name__ == "__main__":
+def par_geds_dsp_evtsel() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
     argparser.add_argument(
@@ -168,10 +168,6 @@ def get_out_data(
         if lh5_path[-1] != "/":
             lh5_path += "/"
 
-        raw_fields = [
-            field.replace(lh5_path, "") for field in lh5.ls(raw_files[0], lh5_path)
-        ]
-
         tb = sto.read(
             lh5_path, raw_files, field_mask=["daqenergy", "t_sat_lo", "timestamp"]
         )[0]
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py
new file mode 100644
index 0000000..691a0e8
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py
@@ -0,0 +1,112 @@
+import argparse
+import pickle as pkl
+import time
+from pathlib import Path
+
+import lgdo.lh5 as lh5
+import numpy as np
+import pygama.pargen.noise_optimization as pno
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
+from pygama.pargen.dsp_optimize import run_one_dsp
+
+from ....log import build_log
+
+
+def par_geds_dsp_nopt() -> None:
+    sto = lh5.LH5Store()
+
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
+    argparser.add_argument("--database", help="database", type=str, required=True)
+    argparser.add_argument("--inplots", help="inplots", type=str)
+
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
+    argparser.add_argument("--plot_path", help="plot_path", type=str)
+
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
+
+    log = build_log(config_dict, args.log)
+
+    t0 = time.time()
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
+    dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
+    opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
+
+    opt_dict = Props.read_from(opt_json)
+    db_dict = Props.read_from(args.database)
+
+    if opt_dict.pop("run_nopt") is True:
+        with Path(args.raw_filelist).open() as f:
+            files = f.read().splitlines()
+
+        raw_files = sorted(files)
+
+        energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0]
+        idxs = np.where(energies.nda == 0)[0]
+        tb_data = sto.read(
+            f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs
+        )[0]
+        t1 = time.time()
+        log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}")
+
+        log.info(f"Select baselines {len(tb_data)}")
+        dsp_data = run_one_dsp(tb_data, dsp_config)
+        cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars"))
+        cut_idxs = get_cut_indexes(dsp_data, cut_dict)
+        tb_data = sto.read(
+            f"{channel}/raw",
+            raw_files,
+            n_rows=opt_dict.pop("n_events"),
+            idx=idxs[cut_idxs],
+        )[0]
+        log.info(f"... {len(tb_data)} baselines after cuts")
+
+        if isinstance(dsp_config, (str, list)):
+            dsp_config = Props.read_from(dsp_config)
+
+        if args.plot_path:
+            out_dict, plot_dict = pno.noise_optimization(
+                tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1
+            )
+        else:
+            out_dict = pno.noise_optimization(
+                raw_files, dsp_config, db_dict.copy(), opt_dict, channel
+            )
+
+        t2 = time.time()
+        log.info(f"Optimiser finished in {(t2-t0)/60} minutes")
+    else:
+        out_dict = {}
+        plot_dict = {}
+
+    if args.plot_path:
+        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+        if args.inplots:
+            with Path(args.inplots).open("rb") as r:
+                old_plot_dict = pkl.load(r)
+            plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict)
+        else:
+            plot_dict = {"noise_optimisation": plot_dict}
+        with Path(args.plot_path).open("wb") as f:
+            pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
+
+    Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict))
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py
new file mode 100644
index 0000000..d4a1e22
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py
@@ -0,0 +1,26 @@
+import argparse
+from pathlib import Path
+
+from dbetto.catalog import Props
+
+
+def par_geds_dsp_svm() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--log", help="log file", type=str)
+    argparser.add_argument(
+        "--output_file", help="output par file", type=str, required=True
+    )
+    argparser.add_argument(
+        "--input_file", help="input par file", type=str, required=True
+    )
+    argparser.add_argument("--svm_file", help="svm file", required=True)
+    args = argparser.parse_args()
+
+    par_data = Props.read_from(args.input_file)
+
+    file = f"'$_/{Path(args.svm_file).name}'"
+
+    par_data["svm"] = {"model_file": file}
+
+    Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.output_file, par_data)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py
new file mode 100644
index 0000000..162ccfa
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py
@@ -0,0 +1,63 @@
+import argparse
+import pickle as pkl
+from pathlib import Path
+
+from dbetto import TextDB
+from dbetto.catalog import Props
+from lgdo import lh5
+from sklearn.svm import SVC
+
+from ....log import build_log
+
+
+def par_geds_dsp_svm_build() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--log", help="log file", type=str)
+    argparser.add_argument("--configs", help="config file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+
+    argparser.add_argument(
+        "--output_file", help="output SVM file", type=str, required=True
+    )
+    argparser.add_argument(
+        "--train_data", help="input data file", type=str, required=True
+    )
+    argparser.add_argument(
+        "--train_hyperpars", help="input hyperparameter file", required=True
+    )
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
+
+    log = build_log(config_dict, args.log)
+
+    # Load files
+    tb = lh5.read("ml_train/dsp", args.train_data)
+    log.debug("loaded data")
+
+    hyperpars = Props.read_from(args.train_hyperpars)
+
+    # Define training inputs
+    dwts_norm = tb["dwt_norm"].nda
+    labels = tb["dc_label"].nda
+
+    log.debug("training model")
+    # Initialize and train SVM
+    svm = SVC(
+        random_state=int(hyperpars["random_state"]),
+        kernel=hyperpars["kernel"],
+        decision_function_shape=hyperpars["decision_function_shape"],
+        class_weight=hyperpars["class_weight"],
+        C=float(hyperpars["C"]),
+        gamma=float(hyperpars["gamma"]),
+    )
+
+    svm.fit(dwts_norm, labels)
+    log.debug("trained model")
+
+    # Save trained model with pickle
+    with Path(args.output_file).open("wb") as svm_file:
+        pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
new file mode 100644
index 0000000..4d493a1
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
@@ -0,0 +1,146 @@
+import argparse
+import pickle as pkl
+from pathlib import Path
+
+import lgdo.lh5 as lh5
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
+from pygama.pargen.dsp_optimize import run_one_dsp
+from pygama.pargen.extract_tau import ExtractTau
+
+from ....log import build_log
+
+
+def par_geds_dsp_tau() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--configs", help="configs path", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--plot_path", help="plot path", type=str, required=False)
+    argparser.add_argument("--output_file", help="output file", type=str, required=True)
+
+    argparser.add_argument(
+        "--pulser_file", help="pulser file", type=str, required=False
+    )
+
+    argparser.add_argument("--raw_files", help="input files", nargs="*", type=str)
+    argparser.add_argument(
+        "--tcm_files", help="tcm_files", nargs="*", type=str, required=False
+    )
+    args = argparser.parse_args()
+
+    sto = lh5.LH5Store()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
+
+    log = build_log(config_dict, args.log)
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
+    channel_dict = config_dict["inputs"]["processing_chain"][args.channel]
+    kwarg_dict = config_dict["inputs"]["tau_config"][args.channel]
+
+    kwarg_dict = Props.read_from(kwarg_dict)
+
+    if kwarg_dict["run_tau"] is True:
+        dsp_config = Props.read_from(channel_dict)
+        kwarg_dict.pop("run_tau")
+        if (
+            isinstance(args.raw_files, list)
+            and args.raw_files[0].split(".")[-1] == "filelist"
+        ):
+            input_file = args.raw_files[0]
+            with Path(input_file).open() as f:
+                input_file = f.read().splitlines()
+        else:
+            input_file = args.raw_files
+
+        if args.pulser_file:
+            pulser_dict = Props.read_from(args.pulser_file)
+            mask = np.array(pulser_dict["mask"])
+
+        elif args.tcm_filelist:
+            # get pulser mask from tcm files
+            with Path(args.tcm_filelist).open() as f:
+                tcm_files = f.read().splitlines()
+            tcm_files = sorted(np.unique(tcm_files))
+            ids, mask = get_tcm_pulser_ids(
+                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
+            )
+        else:
+            msg = "No pulser file or tcm filelist provided"
+            raise ValueError(msg)
+
+        data = sto.read(
+            f"{channel}/raw",
+            input_file,
+            field_mask=["daqenergy", "timestamp", "t_sat_lo"],
+        )[0].view_as("pd")
+        threshold = kwarg_dict.pop("threshold")
+
+        discharges = data["t_sat_lo"] > 0
+        discharge_timestamps = np.where(data["timestamp"][discharges])[0]
+        is_recovering = np.full(len(data), False, dtype=bool)
+        for tstamp in discharge_timestamps:
+            is_recovering = is_recovering | np.where(
+                (
+                    ((data["timestamp"] - tstamp) < 0.01)
+                    & ((data["timestamp"] - tstamp) > 0)
+                ),
+                True,
+                False,
+            )
+        cuts = np.where(
+            (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering)
+        )[0]
+
+        tb_data = sto.read(
+            f"{channel}/raw",
+            input_file,
+            idx=cuts,
+            n_rows=kwarg_dict.pop("n_events"),
+        )[0]
+
+        tb_out = run_one_dsp(tb_data, dsp_config)
+        log.debug("Processed Data")
+        cut_parameters = kwarg_dict.get("cut_parameters", None)
+        if cut_parameters is not None:
+            idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters)
+            log.debug("Applied cuts")
+            log.debug(f"{len(idxs)} events passed cuts")
+        else:
+            idxs = np.full(len(tb_out), True, dtype=bool)
+
+        tau = ExtractTau(dsp_config, kwarg_dict["wf_field"])
+        slopes = tb_out["tail_slope"].nda
+        log.debug("Calculating pz constant")
+
+        tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])
+
+        if args.plot_path:
+            Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
+
+            plot_dict = tau.plot_waveforms_after_correction(
+                tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean")
+            )
+            plot_dict.update(tau.plot_slopes(slopes[idxs]))
+
+            with Path(args.plot_path).open("wb") as f:
+                pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
+        out_dict = tau.output_dict
+    else:
+        out_dict = {}
+
+    Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.output_file, out_dict)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
new file mode 100644
index 0000000..2b6c6e1
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
@@ -0,0 +1,262 @@
+from __future__ import annotations
+
+import argparse
+import pickle as pkl
+import warnings
+from pathlib import Path
+
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from pygama.pargen.AoE_cal import *  # noqa: F403
+from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.utils import load_data
+
+from ....convert_np import convert_dict_np_to_float
+from ....log import build_log
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+
+def get_results_dict(aoe_class):
+    return {
+        "cal_energy_param": aoe_class.cal_energy_param,
+        "dt_param": aoe_class.dt_param,
+        "rt_correction": aoe_class.dt_corr,
+        "1000-1300keV": aoe_class.timecorr_df.to_dict("index"),
+        "correction_fit_results": aoe_class.energy_corr_res_dict,
+        "low_cut": aoe_class.low_cut_val,
+        "high_cut": aoe_class.high_cut_val,
+        "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"),
+        "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"),
+    }
+
+
+def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None):
+    if plot_dict is not None:
+        for key, item in plot_options.items():
+            if item["options"] is not None:
+                plot_dict[key] = item["function"](aoe_class, data, **item["options"])
+            else:
+                plot_dict[key] = item["function"](aoe_class, data)
+    else:
+        plot_dict = {}
+    return plot_dict
+
+
+def par_geds_hit_aoe() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("files", help="files", nargs="*", type=str)
+    argparser.add_argument(
+        "--pulser_file", help="pulser_file", type=str, required=False
+    )
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+    )
+
+    argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
+    argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
+    argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
+
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
+    argparser.add_argument("--hit_pars", help="hit_pars", type=str)
+    argparser.add_argument("--aoe_results", help="aoe_results", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_hit_aoecal"]
+
+    log = build_log(config_dict, args.log)
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
+    channel_dict = config_dict["inputs"]["aoecal_config"][args.channel]
+    kwarg_dict = Props.read_from(channel_dict)
+
+    ecal_dict = Props.read_from(args.ecal_file)
+    cal_dict = ecal_dict["pars"]
+    eres_dict = ecal_dict["results"]["ecal"]
+
+    with Path(args.eres_file).open("rb") as o:
+        object_dict = pkl.load(o)
+
+    if kwarg_dict["run_aoe"] is True:
+        kwarg_dict.pop("run_aoe")
+
+        pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak
+
+        sigma_func = (
+            eval(kwarg_dict.pop("sigma_func"))
+            if "sigma_func" in kwarg_dict
+            else SigmaFit
+        )
+
+        mean_func = (
+            eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
+        )
+
+        if "plot_options" in kwarg_dict:
+            for field, item in kwarg_dict["plot_options"].items():
+                kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
+
+        with Path(args.files[0]).open() as f:
+            files = f.read().splitlines()
+        files = sorted(files)
+
+        try:
+            eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy()
+
+            def eres_func(x):
+                return eval(eres["expression"], dict(x=x, **eres["parameters"]))
+
+        except KeyError:
+
+            def eres_func(x):
+                return x * np.nan
+
+        params = [
+            kwarg_dict["current_param"],
+            "tp_0_est",
+            "tp_99",
+            kwarg_dict["energy_param"],
+            kwarg_dict["cal_energy_param"],
+            kwarg_dict["cut_field"],
+            "timestamp",
+        ]
+
+        if "dt_param" in kwarg_dict:
+            params += kwarg_dict["dt_param"]
+        else:
+            params += "dt_eff"
+
+        if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None:
+            cal_dict.update(kwarg_dict["dt_cut"]["cut"])
+            params.append(kwarg_dict["dt_cut"]["out_param"])
+
+        # load data in
+        data, threshold_mask = load_data(
+            files,
+            f"{channel}/dsp",
+            cal_dict,
+            params=params,
+            threshold=kwarg_dict.pop("threshold"),
+            return_selection_mask=True,
+        )
+
+        if args.pulser_file:
+            pulser_dict = Props.read_from(args.pulser_file)
+            mask = np.array(pulser_dict["mask"])
+            if "pulser_multiplicity_threshold" in kwarg_dict:
+                kwarg_dict.pop("pulser_multiplicity_threshold")
+
+        elif args.tcm_filelist:
+            # get pulser mask from tcm files
+            with Path(args.tcm_filelist).open() as f:
+                tcm_files = f.read().splitlines()
+            tcm_files = sorted(np.unique(tcm_files))
+            ids, mask = get_tcm_pulser_ids(
+                tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+            )
+        else:
+            msg = "No pulser file or tcm filelist provided"
+            raise ValueError(msg)
+
+        data["is_pulser"] = mask[threshold_mask]
+
+        data["AoE_Uncorr"] = (
+            data[kwarg_dict["current_param"]] / data[kwarg_dict["energy_param"]]
+        )
+        aoe = CalAoE(
+            cal_dicts=cal_dict,
+            cal_energy_param=kwarg_dict["cal_energy_param"],
+            eres_func=eres_func,
+            pdf=pdf,
+            mean_func=mean_func,
+            sigma_func=sigma_func,
+            selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)",
+            dt_corr=kwarg_dict.get("dt_corr", False),
+            dep_correct=kwarg_dict.get("dep_correct", False),
+            dt_cut=kwarg_dict.get("dt_cut", None),
+            dt_param=kwarg_dict.get("dt_param", 3),
+            high_cut_val=kwarg_dict.get("high_cut_val", 3),
+            compt_bands_width=kwarg_dict.get("debug_mode", 20),
+            debug_mode=args.debug | kwarg_dict.get("debug_mode", False),
+        )
+        aoe.update_cal_dicts(
+            {
+                "AoE_Uncorr": {
+                    "expression": f"{kwarg_dict['current_param']}/{kwarg_dict['energy_param']}",
+                    "parameters": {},
+                }
+            }
+        )
+        aoe.calibrate(data, "AoE_Uncorr")
+
+        log.info("Calibrated A/E")
+        out_dict = get_results_dict(aoe)
+        plot_dict = fill_plot_dict(aoe, data, kwarg_dict.get("plot_options", None))
+
+        aoe.pdf = aoe.pdf.name
+
+        # need to change eres func as can't pickle lambdas
+        try:
+            aoe.eres_func = eres_dict[kwarg_dict["cal_energy_param"]][
+                "eres_linear"
+            ].copy()
+        except KeyError:
+            aoe.eres_func = {}
+    else:
+        out_dict = {}
+        plot_dict = {}
+        aoe = None
+
+    if args.plot_file:
+        common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
+        if args.inplots:
+            with Path(args.inplots).open("rb") as r:
+                out_plot_dict = pkl.load(r)
+            out_plot_dict.update({"aoe": plot_dict})
+        else:
+            out_plot_dict = {"aoe": plot_dict}
+
+        if "common" in list(out_plot_dict) and common_dict is not None:
+            out_plot_dict["common"].update(common_dict)
+        elif common_dict is not None:
+            out_plot_dict["common"] = common_dict
+
+        Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_file).open("wb") as w:
+            pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
+
+    Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
+    results_dict = dict(**ecal_dict["results"], aoe=out_dict)
+    final_hit_dict = {
+        "pars": {"operations": cal_dict},
+        "results": results_dict,
+    }
+
+    final_hit_dict = convert_dict_np_to_float(final_hit_dict)
+
+    Props.write_to(args.hit_pars, final_hit_dict)
+
+    Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True)
+    final_object_dict = dict(
+        **object_dict,
+        aoe=aoe,
+    )
+    with Path(args.aoe_results).open("wb") as w:
+        pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
similarity index 99%
rename from workflow/src/legenddataflow/scripts/pars_hit_ecal.py
rename to workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
index 725fc84..c67e304 100644
--- a/workflow/src/legenddataflow/scripts/pars_hit_ecal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
@@ -2,7 +2,6 @@
 
 import argparse
 import copy
-import logging
 import pickle as pkl
 import warnings
 from datetime import datetime
@@ -24,10 +23,9 @@
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 
-from ..convert_np import convert_dict_np_to_float
-from ..log import build_log
+from ....convert_np import convert_dict_np_to_float
+from ....log import build_log
 
-log = logging.getLogger(__name__)
 mpl.use("agg")
 sto = lh5.LH5Store()
 
@@ -437,7 +435,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         }
 
 
-if __name__ == "__main__":
+def par_geds_hit_ecal() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--files", help="filelist", nargs="*", type=str)
     argparser.add_argument(
@@ -478,7 +476,7 @@ def get_results_dict(ecal_class, data, cal_energy_param, selection_string):
         msg = "invalid tier"
         raise ValueError(msg)
 
-    log = build_log(config_dict, args.log)
+    build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
new file mode 100644
index 0000000..357fe33
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+import argparse
+import pickle as pkl
+import warnings
+from pathlib import Path
+
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from pygama.math.distributions import gaussian
+from pygama.pargen.AoE_cal import *  # noqa: F403
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+from pygama.pargen.lq_cal import *  # noqa: F403
+from pygama.pargen.lq_cal import LQCal
+from pygama.pargen.utils import load_data
+
+from ....convert_np import convert_dict_np_to_float
+from ....log import build_log
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+
+def get_results_dict(lq_class):
+    return {
+        "cal_energy_param": lq_class.cal_energy_param,
+        "DEP_means": lq_class.timecorr_df.to_dict("index"),
+        "rt_correction": lq_class.dt_fit_pars,
+        "cut_fit_pars": lq_class.cut_fit_pars.to_dict(),
+        "cut_value": lq_class.cut_val,
+        "sfs": lq_class.low_side_sf.to_dict("index"),
+    }
+
+
+def fill_plot_dict(lq_class, data, plot_options, plot_dict=None):
+    if plot_dict is not None:
+        for key, item in plot_options.items():
+            if item["options"] is not None:
+                plot_dict[key] = item["function"](lq_class, data, **item["options"])
+            else:
+                plot_dict[key] = item["function"](lq_class, data)
+    else:
+        plot_dict = {}
+    return plot_dict
+
+
+def par_geds_hit_lq() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("files", help="files", nargs="*", type=str)
+    argparser.add_argument(
+        "--pulser_file", help="pulser_file", type=str, required=False
+    )
+    argparser.add_argument(
+        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+    )
+
+    argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
+    argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
+    argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
+
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
+    argparser.add_argument("--hit_pars", help="hit_pars", type=str)
+    argparser.add_argument("--lq_results", help="lq_results", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_hit_lqcal"]
+
+    log = build_log(config_dict, args.log)
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+
+    channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
+    kwarg_dict = Props.read_from(channel_dict)
+
+    ecal_dict = Props.read_from(args.ecal_file)
+    cal_dict = ecal_dict["pars"]["operations"]
+    eres_dict = ecal_dict["results"]["ecal"]
+
+    with Path(args.eres_file).open("rb") as o:
+        object_dict = pkl.load(o)
+
+    if kwarg_dict["run_lq"] is True:
+        kwarg_dict.pop("run_lq")
+
+        cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian
+
+        if "plot_options" in kwarg_dict:
+            for field, item in kwarg_dict["plot_options"].items():
+                kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
+
+        with Path(args.files[0]).open() as f:
+            files = f.read().splitlines()
+        files = sorted(files)
+
+        try:
+            eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy()
+
+            def eres_func(x):
+                return eval(eres["expression"], dict(x=x, **eres["parameters"]))
+
+        except KeyError:
+
+            def eres_func(x):
+                return x * np.nan
+
+        params = [
+            "lq80",
+            "dt_eff",
+            kwarg_dict["energy_param"],
+            kwarg_dict["cal_energy_param"],
+            kwarg_dict["cut_field"],
+        ]
+
+        # load data in
+        data, threshold_mask = load_data(
+            files,
+            f"{channel}/dsp",
+            cal_dict,
+            params=params,
+            threshold=kwarg_dict.pop("threshold"),
+            return_selection_mask=True,
+        )
+
+        if args.pulser_file:
+            pulser_dict = Props.read_from(args.pulser_file)
+            mask = np.array(pulser_dict["mask"])
+            if "pulser_multiplicity_threshold" in kwarg_dict:
+                kwarg_dict.pop("pulser_multiplicity_threshold")
+
+        elif args.tcm_filelist:
+            # get pulser mask from tcm files
+            with Path(args.tcm_filelist).open() as f:
+                tcm_files = f.read().splitlines()
+            tcm_files = sorted(np.unique(tcm_files))
+            ids, mask = get_tcm_pulser_ids(
+                tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+            )
+        else:
+            msg = "No pulser file or tcm filelist provided"
+            raise ValueError(msg)
+
+        data["is_pulser"] = mask[threshold_mask]
+
+        lq = LQCal(
+            cal_dict,
+            kwarg_dict["cal_energy_param"],
+            kwarg_dict["dt_param"],
+            eres_func,
+            cdf,
+            selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)",
+            debug_mode=args.debug_mode | kwarg_dict.get("debug_mode", False),
+        )
+
+        data["LQ_Ecorr"] = np.divide(data["lq80"], data[kwarg_dict["energy_param"]])
+
+        lq.update_cal_dicts(
+            {
+                "LQ_Ecorr": {
+                    "expression": f"lq80/{kwarg_dict['energy_param']}",
+                    "parameters": {},
+                }
+            }
+        )
+
+        lq.calibrate(data, "LQ_Ecorr")
+        log.info("Calibrated LQ")
+
+        out_dict = get_results_dict(lq)
+        plot_dict = fill_plot_dict(lq, data, kwarg_dict.get("plot_options", None))
+
+        # need to change eres func as can't pickle lambdas
+        try:
+            lq.eres_func = eres_dict[kwarg_dict["cal_energy_param"]][
+                "eres_linear"
+            ].copy()
+        except KeyError:
+            lq.eres_func = {}
+    else:
+        out_dict = {}
+        plot_dict = {}
+        lq = None
+
+    if args.plot_file:
+        common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
+        if args.inplots:
+            with Path(args.inplots).open("rb") as r:
+                out_plot_dict = pkl.load(r)
+            out_plot_dict.update({"lq": plot_dict})
+        else:
+            out_plot_dict = {"lq": plot_dict}
+
+        if "common" in list(out_plot_dict) and common_dict is not None:
+            out_plot_dict["common"].update(common_dict)
+        elif common_dict is not None:
+            out_plot_dict["common"] = common_dict
+
+        Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
+        with Path(args.plot_file).open("wb") as w:
+            pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
+
+    final_hit_dict = convert_dict_np_to_float(
+        {
+            "pars": {"operations": cal_dict},
+            "results": dict(**eres_dict, lq=out_dict),
+        }
+    )
+    Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.hit_pars, final_hit_dict)
+
+    final_object_dict = dict(
+        **object_dict,
+        lq=lq,
+    )
+    Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
+    with Path(args.lq_results).open("wb") as w:
+        pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
similarity index 98%
rename from workflow/src/legenddataflow/scripts/pars_hit_qc.py
rename to workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
index 5e6a378..acc1a32 100644
--- a/workflow/src/legenddataflow/scripts/pars_hit_qc.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
@@ -2,7 +2,6 @@
 
 import argparse
 import json
-import logging
 import pickle as pkl
 import re
 import warnings
@@ -20,15 +19,13 @@
 )
 from pygama.pargen.utils import load_data
 
-from ..convert_np import convert_dict_np_to_float
-from ..log import build_log
-
-log = logging.getLogger(__name__)
+from ....convert_np import convert_dict_np_to_float
+from ....log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
-if __name__ == "__main__":
+def par_geds_hit_qc() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
     argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
similarity index 84%
rename from workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
rename to workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
index 8aad849..12c70f8 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_aoecal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
@@ -2,12 +2,10 @@
 
 import argparse
 import copy
-import logging
 import pickle as pkl
 import re
 import warnings
 from pathlib import Path
-from typing import Callable
 
 import numpy as np
 import pandas as pd
@@ -19,10 +17,9 @@
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 
-from ..FileKey import ChannelProcKey, ProcessingFileKey
-from ..log import build_log
+from ....FileKey import ChannelProcKey, ProcessingFileKey
+from ....log import build_log
 
-log = logging.getLogger(__name__)
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
 
@@ -77,62 +74,6 @@ def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None):
     return plot_dict
 
 
-def aoe_calibration(
-    data: pd.Dataframe,
-    cal_dicts: dict,
-    current_param: str,
-    energy_param: str,
-    cal_energy_param: str,
-    eres_func: Callable,
-    pdf: Callable = aoe_peak,
-    selection_string: str = "",
-    dt_corr: bool = False,
-    dep_correct: bool = False,
-    dt_cut: dict | None = None,
-    high_cut_val: int = 3,
-    mean_func: Callable = Pol1,
-    sigma_func: Callable = SigmaFit,
-    # dep_acc: float = 0.9,
-    dt_param: str = "dt_eff",
-    comptBands_width: int = 20,
-    plot_options: dict | None = None,
-    debug_mode: bool = False,
-):
-    data["AoE_Uncorr"] = data[current_param] / data[energy_param]
-    aoe = CalAoE(
-        cal_dicts=cal_dicts,
-        cal_energy_param=cal_energy_param,
-        eres_func=eres_func,
-        pdf=pdf,
-        selection_string=selection_string,
-        dt_corr=dt_corr,
-        dep_correct=dep_correct,
-        dt_cut=dt_cut,
-        dt_param=dt_param,
-        high_cut_val=high_cut_val,
-        mean_func=mean_func,
-        sigma_func=sigma_func,
-        compt_bands_width=comptBands_width,
-        debug_mode=debug_mode | args.debug,
-    )
-    aoe.update_cal_dicts(
-        {
-            "AoE_Uncorr": {
-                "expression": f"{current_param}/{energy_param}",
-                "parameters": {},
-            }
-        }
-    )
-    aoe.calibrate(data, "AoE_Uncorr")
-    log.info("Calibrated A/E")
-    return (
-        cal_dicts,
-        get_results_dict(aoe),
-        fill_plot_dict(aoe, data, plot_options),
-        aoe,
-    )
-
-
 def run_aoe_calibration(
     data,
     cal_dicts,
@@ -143,6 +84,7 @@ def run_aoe_calibration(
     configs,
     channel,
     datatype,
+    debug_mode=False,
     # gen_plots=True,
 ):
     configs = LegendMetadata(path=configs)
@@ -211,28 +153,50 @@ def eres_func(x):
                 def eres_func(x):
                     return x * np.nan
 
-        cal_dicts, out_dict, aoe_plot_dict, aoe_obj = aoe_calibration(
-            data,
-            selection_string=f"{kwarg_dict.pop('final_cut_field')}&(~is_pulser)",
+        data["AoE_Uncorr"] = (
+            data[kwarg_dict["current_param"]] / data[kwarg_dict["energy_param"]]
+        )
+        aoe = CalAoE(
             cal_dicts=cal_dicts,
+            cal_energy_param=kwarg_dict["cal_energy_param"],
             eres_func=eres_func,
             pdf=pdf,
             mean_func=mean_func,
             sigma_func=sigma_func,
-            **kwarg_dict,
+            selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)",
+            dt_corr=kwarg_dict.get("dt_corr", False),
+            dep_correct=kwarg_dict.get("dep_correct", False),
+            dt_cut=kwarg_dict.get("dt_cut", None),
+            dt_param=kwarg_dict.get("dt_param", 3),
+            high_cut_val=kwarg_dict.get("high_cut_val", 3),
+            compt_bands_width=kwarg_dict.get("debug_mode", 20),
+            debug_mode=debug_mode | kwarg_dict.get("debug_mode", False),
         )
-        aoe_obj.pdf = aoe_obj.pdf.name
+        aoe.update_cal_dicts(
+            {
+                "AoE_Uncorr": {
+                    "expression": f"{kwarg_dict['current_param']}/{kwarg_dict['energy_param']}",
+                    "parameters": {},
+                }
+            }
+        )
+        aoe.calibrate(data, "AoE_Uncorr")
+
+        out_dict = get_results_dict(aoe)
+        plot_dict = fill_plot_dict(aoe, data, kwarg_dict.get("plot_options", None))
+
+        aoe.pdf = aoe.pdf.name
         # need to change eres func as can't pickle lambdas
         try:
-            aoe_obj.eres_func = results_dicts[next(iter(results_dicts))][
-                "partition_ecal"
-            ][kwarg_dict["cal_energy_param"]]["eres_linear"]
+            aoe.eres_func = results_dicts[next(iter(results_dicts))]["partition_ecal"][
+                kwarg_dict["cal_energy_param"]
+            ]["eres_linear"]
         except KeyError:
-            aoe_obj.eres_func = {}
+            aoe.eres_func = {}
     else:
         out_dict = {tstamp: None for tstamp in cal_dicts}
         aoe_plot_dict = {}
-        aoe_obj = None
+        aoe = None
 
     out_result_dicts = {}
     for tstamp, result_dict in results_dicts.items():
@@ -240,7 +204,7 @@ def eres_func(x):
 
     out_object_dicts = {}
     for tstamp, object_dict in object_dicts.items():
-        out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe_obj)
+        out_object_dicts[tstamp] = dict(**object_dict, aoe=aoe)
 
     common_dict = (
         aoe_plot_dict.pop("common") if "common" in list(aoe_plot_dict) else None
@@ -257,7 +221,7 @@ def eres_func(x):
     return cal_dicts, out_result_dicts, out_object_dicts, out_plot_dicts
 
 
-if __name__ == "__main__":
+def par_geds_pht_aoe() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
         "--input_files", help="files", type=str, nargs="*", required=True
@@ -298,7 +262,7 @@ def eres_func(x):
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]["pars_pht_aoecal"]
 
-    log = build_log(config_dict, args.log)
+    build_log(config_dict, args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
@@ -424,6 +388,7 @@ def eres_func(x):
             args.configs,
             args.channel,
             args.datatype,
+            debug_mode=args.debug,
             # gen_plots=bool(args.plot_file),
         )
 
@@ -436,7 +401,7 @@ def eres_func(x):
         for out in sorted(args.hit_pars):
             fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
             final_hit_dict = {
-                "pars": cal_dict[fk.timestamp],
+                "pars": cal_dicts[fk.timestamp],
                 "results": results_dicts[fk.timestamp],
             }
             Path(out).parent.mkdir(parents=True, exist_ok=True)
@@ -446,4 +411,4 @@ def eres_func(x):
             fk = ChannelProcKey.get_filekey_from_pattern(Path(out).name)
             Path(out).parent.mkdir(parents=True, exist_ok=True)
             with Path(out).open("wb") as w:
-                pkl.dump(object_dict[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
+                pkl.dump(object_dicts[fk.timestamp], w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
similarity index 99%
rename from workflow/src/legenddataflow/scripts/pars_pht_partcal.py
rename to workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
index bd2d93f..560a063 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_partcal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
@@ -19,8 +19,8 @@
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 
-from ..FileKey import ChannelProcKey, ProcessingFileKey
-from ..log import build_log
+from ....FileKey import ChannelProcKey, ProcessingFileKey
+from ....log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
similarity index 95%
rename from workflow/src/legenddataflow/scripts/pars_pht_fast.py
rename to workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
index 6dda1b7..0faa42d 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_fast.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
@@ -11,14 +11,14 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from legendmeta import LegendMetadata
-from pars_pht_aoecal import run_aoe_calibration
-from pars_pht_lqcal import run_lq_calibration
-from pars_pht_partcal import calibrate_partition
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
+from workflow.src.legenddataflow.scripts.par.geds.pht.aoe import run_aoe_calibration
+from workflow.src.legenddataflow.scripts.par.geds.pht.lq import run_lq_calibration
+from workflow.src.legenddataflow.scripts.par.geds.pht.partcal import calibrate_partition
 
-from ..FileKey import ChannelProcKey, ProcessingFileKey
-from ..log import build_log
+from ....FileKey import ChannelProcKey, ProcessingFileKey
+from ....log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -42,7 +42,7 @@ def run_splitter(files):
     return run_files
 
 
-if __name__ == "__main__":
+def par_geds_pht_fast() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
         "--input_files", help="files", type=str, nargs="*", required=True
@@ -83,7 +83,7 @@ def run_splitter(files):
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     config_dict = configs["snakemake_rules"]
 
-    log = build_log(config_dict["pars_pht_partcal"], args.log)
+    build_log(config_dict["pars_pht_partcal"], args.log)
 
     meta = LegendMetadata(path=args.metadata)
     chmap = meta.channelmap(args.timestamp, system=args.datatype)
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_lqcal.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
similarity index 100%
rename from workflow/src/legenddataflow/scripts/pars_pht_lqcal.py
rename to workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
similarity index 98%
rename from workflow/src/legenddataflow/scripts/pars_pht_qc.py
rename to workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
index feee4e5..af6dc95 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_qc.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
@@ -2,7 +2,6 @@
 
 import argparse
 import json
-import logging
 import pickle as pkl
 import re
 import warnings
@@ -20,10 +19,8 @@
 )
 from pygama.pargen.utils import load_data
 
-from ..convert_np import convert_dict_np_to_float
-from ..log import build_log
-
-log = logging.getLogger(__name__)
+from ....convert_np import convert_dict_np_to_float
+from ....log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
similarity index 97%
rename from workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
rename to workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
index 71167df..38f5e20 100644
--- a/workflow/src/legenddataflow/scripts/pars_pht_qc_phy.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
@@ -2,7 +2,6 @@
 
 import argparse
 import json
-import logging
 import pickle as pkl
 import re
 import warnings
@@ -19,10 +18,8 @@
     get_keys,
 )
 
-from ..convert_np import convert_dict_np_to_float
-from ..log import build_log
-
-log = logging.getLogger(__name__)
+from ....convert_np import convert_dict_np_to_float
+from ....log import build_log
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
new file mode 100644
index 0000000..65508a2
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
@@ -0,0 +1,160 @@
+import argparse
+import pickle as pkl
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib as mpl
+import matplotlib.dates as mdates
+import matplotlib.pyplot as plt
+import numpy as np
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+
+from ....FileKey import ChannelProcKey
+
+mpl.use("Agg")
+
+
+def par_geds_psp_average() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument(
+        "--input", help="input files", nargs="*", type=str, required=True
+    )
+    argparser.add_argument(
+        "--output", help="output file", nargs="*", type=str, required=True
+    )
+    argparser.add_argument(
+        "--in_plots", help="input plot files", nargs="*", type=str, required=False
+    )
+    argparser.add_argument(
+        "--out_plots", help="output plot files", nargs="*", type=str, required=False
+    )
+    argparser.add_argument(
+        "--in_obj", help="input object files", nargs="*", type=str, required=False
+    )
+    argparser.add_argument(
+        "--out_obj", help="output object files", nargs="*", type=str, required=False
+    )
+
+    argparser.add_argument("--log", help="log_file", type=str)
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+    args = argparser.parse_args()
+
+    configs = LegendMetadata(args.configs, lazy=True).on(
+        args.timestamp, system=args.datatype
+    )
+    merge_config = Props.read_from(
+        configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel]
+    )
+
+    ave_fields = merge_config["average_fields"]
+
+    # partitions could be different for different channels - do separately for each channel
+    in_dicts = {}
+    for file in args.input:
+        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
+        in_dicts[tstamp] = Props.read_from(file)
+
+    plot_dict = {}
+    for field in ave_fields:
+        keys = field.split(".")
+        vals = []
+        for _tstamp, tstamp_dict in in_dicts.items():
+            val = tstamp_dict.copy()
+            for key in keys:
+                val = val[key]
+            vals.append(val)
+            if "dsp" in tstamp_dict:
+                tmp_dict = tstamp_dict["dsp"]
+            else:
+                tmp_dict = {}
+                tstamp_dict["dsp"] = tmp_dict
+            for i, key in enumerate(keys):
+                if i == len(keys) - 1:
+                    tmp_dict[key] = val
+                else:
+                    if key in tmp_dict:
+                        tmp_dict = tmp_dict[key]
+                    else:
+                        tmp_dict[key] = {}
+                        tmp_dict = tmp_dict[key]
+        if isinstance(vals[0], str):
+            if "*" in vals[0]:
+                unit = vals[0].split("*")[1]
+                rounding = (
+                    len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16
+                )
+                vals = np.array([float(val.split("*")[0]) for val in vals])
+            else:
+                unit = None
+                rounding = 16
+        else:
+            vals = np.array(vals)
+            unit = None
+            rounding = 16
+
+        mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals)
+        mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val
+
+        for _tstamp, tstamp_dict in in_dicts.items():
+            val = tstamp_dict
+            for i, key in enumerate(keys):
+                if i == len(keys) - 1:
+                    val[key] = mean
+                else:
+                    val = val[key]
+
+        fig = plt.figure()
+        plt.scatter(
+            [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals
+        )
+        plt.axhline(y=mean_val, color="r", linestyle="-")
+        plt.xlabel("time")
+        if unit is not None:
+            plt.ylabel(f"value {unit}")
+        else:
+            plt.ylabel("value")
+        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
+        plt.gcf().autofmt_xdate()
+        plt.title(field)
+        plot_dict[field] = fig
+        plt.close()
+
+    for file in args.output:
+        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
+        Props.write_to(file, in_dicts[tstamp])
+
+    if args.out_plots:
+        for file in args.out_plots:
+            tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
+            if args.in_plots:
+                for infile in args.in_plots:
+                    if tstamp in infile:
+                        with Path(infile).open("rb") as f:
+                            old_plot_dict = pkl.load(f)
+                        break
+                old_plot_dict.update({"psp": plot_dict})
+                new_plot_dict = old_plot_dict
+            else:
+                new_plot_dict = {"psp": plot_dict}
+            with Path(file).open("wb") as f:
+                pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
+
+    if args.out_obj:
+        for file in args.out_obj:
+            tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
+            if args.in_obj:
+                for infile in args.in_obj:
+                    if tstamp in infile:
+                        with Path(infile).open("rb") as f:
+                            old_obj_dict = pkl.load(f)
+                        break
+                new_obj_dict = old_obj_dict
+            else:
+                new_obj_dict = {}
+            with Path(file).open("wb") as f:
+                pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py
new file mode 100644
index 0000000..8af27a2
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py
@@ -0,0 +1,119 @@
+"""
+This script applies a simple calibration to the daqenergy for all channels,
+it does this using a peak search, matching the peaks to the given ones
+and deriving a simple scaling relation from adc to keV.
+"""
+
+import argparse
+import logging
+import pickle as pkl
+from pathlib import Path
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+from dbetto.catalog import Props
+from lgdo import lh5
+from pygama.pargen.energy_cal import HPGeCalibration
+
+mpl.use("agg")
+
+
+def par_geds_raw_blindcal() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--files", help="files", nargs="*", type=str)
+
+    argparser.add_argument("--blind_curve", help="blind_curve", type=str)
+    argparser.add_argument("--plot_file", help="out plot path", type=str)
+
+    argparser.add_argument("--meta", help="meta", type=str)
+    argparser.add_argument("--configs", help="configs", type=str)
+    argparser.add_argument("--log", help="log", type=str)
+
+    argparser.add_argument("--timestamp", help="timestamp", type=str)
+    argparser.add_argument("--datatype", help="datatype", type=str)
+    argparser.add_argument("--channel", help="channel", type=str)
+
+    argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
+    args = argparser.parse_args()
+
+    logging.basicConfig(level=logging.DEBUG, filename=args.log, filemode="w")
+    logging.getLogger("numba").setLevel(logging.INFO)
+    logging.getLogger("parse").setLevel(logging.INFO)
+    logging.getLogger("lgdo").setLevel(logging.INFO)
+    logging.getLogger("matplotlib").setLevel(logging.INFO)
+    log = logging.getLogger(__name__)
+
+    # load in channel map
+    # meta = LegendMetadata(args.meta, lazy=True)
+
+    # chmap = meta.channelmap(args.timestamp)
+    # if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True:
+
+    # peaks to search for
+    peaks_keV = np.array(
+        [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50]
+    )
+
+    E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as(
+        "np"
+    )
+    E_uncal = E_uncal[E_uncal > 200]
+    guess_keV = 2620 / np.nanpercentile(E_uncal, 99)  # usual simple guess
+    # Euc_min = peaks_keV[0] / guess_keV * 0.6
+    # Euc_max = peaks_keV[-1] / guess_keV * 1.1
+    # dEuc = 1 / guess_keV
+
+    # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins)
+
+    hpge_cal = HPGeCalibration(
+        "daqenergy",
+        peaks_keV,
+        guess_keV,
+        0,
+        uncal_is_int=True,
+        debug_mode=args.debug,
+    )
+
+    # Run the rough peak search
+    detected_peaks_locs, detected_peaks_keV, roughpars = hpge_cal.hpge_find_E_peaks(
+        E_uncal
+    )
+
+    log.info(f"{len(detected_peaks_locs)} peaks found:")
+    log.info("\t   Energy   | Position  ")
+    for i, (Li, Ei) in enumerate(zip(detected_peaks_locs, detected_peaks_keV)):
+        log.info(f"\t{i}".ljust(4) + str(Ei).ljust(9) + f"| {Li:g}".ljust(5))  # noqa: G003
+
+    # dictionary to pass to build hit
+    out_dict = {
+        "pars": {
+            "operations": {
+                "daqenergy_cal": {
+                    "expression": "daqenergy*a",
+                    "parameters": {"a": round(roughpars[0], 5)},
+                }
+            }
+        }
+    }
+
+    # plot to check thagt the calibration is correct with zoom on 2.6 peak
+    fig = plt.figure(figsize=(8, 10))
+    ax = plt.subplot(211)
+    ax.hist(E_uncal * roughpars[0], bins=np.arange(0, 3000, 1), histtype="step")
+    ax.set_ylabel("counts")
+    ax.set_yscale("log")
+    ax2 = plt.subplot(212)
+    ax2.hist(
+        E_uncal * roughpars[0],
+        bins=np.arange(2600, 2630, 1 * roughpars[0]),
+        histtype="step",
+    )
+    ax2.set_xlabel("energy (keV)")
+    ax2.set_ylabel("counts")
+    plt.suptitle(args.channel)
+    with Path(args.plot_file).open("wb") as w:
+        pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
+    plt.close()
+
+    Props.write_to_file(args.blind_curve, out_dict)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py b/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py
new file mode 100644
index 0000000..4a8f53c
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py
@@ -0,0 +1,114 @@
+"""
+This script checks that the blinding for a particular channel is still valid,
+it does this by taking the calibration curve stored in the overrides, applying it
+to the daqenergy, running a peak search over the calibrated energy and checking that
+there are peaks within 5keV of the 583 and 2614 peaks. If the detector is in ac mode
+then it will skip the check.
+"""
+
+import argparse
+import pickle as pkl
+from pathlib import Path
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numexpr as ne
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from lgdo import lh5
+from pygama.math.histogram import get_hist
+from pygama.pargen.energy_cal import get_i_local_maxima
+
+from ....log import build_log
+
+mpl.use("Agg")
+
+
+def par_geds_raw_blindcheck() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--files", help="files", nargs="*", type=str)
+    argparser.add_argument("--output", help="output file", type=str)
+    argparser.add_argument("--plot_file", help="plot file", type=str)
+    argparser.add_argument(
+        "--blind_curve", help="blinding curves file", nargs="*", type=str
+    )
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--configs", help="config file", type=str)
+    argparser.add_argument("--channel", help="channel", type=str)
+    argparser.add_argument("--metadata", help="channel", type=str)
+    argparser.add_argument("--log", help="log file", type=str)
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["tier_raw_blindcheck"]
+
+    log = build_log(config_dict, args.log)
+
+    # get the usability status for this channel
+    chmap = (
+        LegendMetadata(args.metadata, lazy=True)
+        .channelmap(args.timestamp)
+        .map("daq.rawid")
+    )
+    det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"]
+
+    # read in calibration curve for this channel
+    blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"]
+
+    # load in the data
+    daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[
+        0
+    ].view_as("np")
+
+    # calibrate daq energy using pre existing curve
+    daqenergy_cal = ne.evaluate(
+        blind_curve["daqenergy_cal"]["expression"],
+        local_dict=dict(
+            daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]
+        ),
+    )
+
+    # bin with 1 keV bins and get maxs
+    hist, bins, var = get_hist(daqenergy_cal, np.arange(0, 3000, 1))
+    maxs = get_i_local_maxima(hist, delta=25)
+    log.info(f"peaks found at : {maxs}")
+
+    # plot the energy spectrum to check calibration
+    fig = plt.figure(figsize=(8, 10))
+    ax = plt.subplot(211)
+    ax.hist(daqenergy_cal, bins=np.arange(0, 3000, 1), histtype="step")
+    ax.set_ylabel("counts")
+    ax.set_yscale("log")
+    ax2 = plt.subplot(212)
+    ax2.hist(
+        daqenergy_cal,
+        bins=np.arange(2600, 2630, 1 * blind_curve["daqenergy_cal"]["parameters"]["a"]),
+        histtype="step",
+    )
+    ax2.set_xlabel("energy (keV)")
+    ax2.set_ylabel("counts")
+    plt.suptitle(args.channel)
+    with Path(args.plot_file).open("wb") as w:
+        pkl.dump(fig, w, protocol=pkl.HIGHEST_PROTOCOL)
+    plt.close()
+
+    # check for peaks within +- 5keV of  2614 and 583 to ensure blinding still
+    # valid and if so create file else raise error.  if detector is in ac mode it
+    # will always pass this check
+    if (
+        np.any(np.abs(maxs - 2614) < 5) and np.any(np.abs(maxs - 583) < 5)
+    ) or det_status is False:
+        Path(args.output).mkdir(parents=True, exist_ok=True)
+        Props.write_to(
+            args.output,
+            {
+                "threshold_adc": np.nanmin(daqenergy),
+                "threshold_kev": np.nanmin(daqenergy_cal),
+            },
+        )
+    else:
+        msg = "peaks not found in daqenergy"
+        raise RuntimeError(msg)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py
new file mode 100644
index 0000000..ab5f400
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py
@@ -0,0 +1,58 @@
+import argparse
+from pathlib import Path
+
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+
+from ....log import build_log
+
+
+def par_geds_tcm_pulser() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--configs", help="configs path", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--channel", help="Channel", type=str, required=True)
+
+    argparser.add_argument(
+        "--pulser_file", help="pulser file", type=str, required=False
+    )
+
+    argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["pars_tcm_pulser"]
+
+    build_log(config_dict, args.log)
+
+    kwarg_dict = config_dict["inputs"]["pulser_config"]
+    kwarg_dict = Props.read_from(kwarg_dict)
+
+    meta = LegendMetadata(path=args.metadata)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid}"
+
+    if (
+        isinstance(args.tcm_files, list)
+        and args.tcm_files[0].split(".")[-1] == "filelist"
+    ):
+        tcm_files = args.tcm_files[0]
+        with Path(tcm_files).open() as f:
+            tcm_files = f.read().splitlines()
+    else:
+        tcm_files = args.tcm_files
+    # get pulser mask from tcm files
+    tcm_files = sorted(np.unique(tcm_files))
+    ids, mask = get_tcm_pulser_ids(
+        tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
+    )
+
+    Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/workflow/src/legenddataflow/scripts/par_psp_geds.py b/workflow/src/legenddataflow/scripts/par_psp_geds.py
deleted file mode 100644
index e65903c..0000000
--- a/workflow/src/legenddataflow/scripts/par_psp_geds.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import argparse
-import pickle as pkl
-from datetime import datetime
-from pathlib import Path
-
-import matplotlib as mpl
-import matplotlib.dates as mdates
-import matplotlib.pyplot as plt
-import numpy as np
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-
-from ..FileKey import ChannelProcKey
-
-mpl.use("Agg")
-
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument(
-    "--input", help="input files", nargs="*", type=str, required=True
-)
-argparser.add_argument(
-    "--output", help="output file", nargs="*", type=str, required=True
-)
-argparser.add_argument(
-    "--in_plots", help="input plot files", nargs="*", type=str, required=False
-)
-argparser.add_argument(
-    "--out_plots", help="output plot files", nargs="*", type=str, required=False
-)
-argparser.add_argument(
-    "--in_obj", help="input object files", nargs="*", type=str, required=False
-)
-argparser.add_argument(
-    "--out_obj", help="output object files", nargs="*", type=str, required=False
-)
-
-argparser.add_argument("--log", help="log_file", type=str)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-args = argparser.parse_args()
-
-configs = LegendMetadata(args.configs, lazy=True).on(
-    args.timestamp, system=args.datatype
-)
-merge_config = Props.read_from(
-    configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel]
-)
-
-ave_fields = merge_config["average_fields"]
-
-# partitions could be different for different channels - do separately for each channel
-in_dicts = {}
-for file in args.input:
-    tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
-    in_dicts[tstamp] = Props.read_from(file)
-
-plot_dict = {}
-for field in ave_fields:
-    keys = field.split(".")
-    vals = []
-    for _tstamp, tstamp_dict in in_dicts.items():
-        val = tstamp_dict.copy()
-        for key in keys:
-            val = val[key]
-        vals.append(val)
-        if "dsp" in tstamp_dict:
-            tmp_dict = tstamp_dict["dsp"]
-        else:
-            tmp_dict = {}
-            tstamp_dict["dsp"] = tmp_dict
-        for i, key in enumerate(keys):
-            if i == len(keys) - 1:
-                tmp_dict[key] = val
-            else:
-                if key in tmp_dict:
-                    tmp_dict = tmp_dict[key]
-                else:
-                    tmp_dict[key] = {}
-                    tmp_dict = tmp_dict[key]
-    if isinstance(vals[0], str):
-        if "*" in vals[0]:
-            unit = vals[0].split("*")[1]
-            rounding = len(val.split("*")[0].split(".")[-1]) if "." in vals[0] else 16
-            vals = np.array([float(val.split("*")[0]) for val in vals])
-        else:
-            unit = None
-            rounding = 16
-    else:
-        vals = np.array(vals)
-        unit = None
-        rounding = 16
-
-    mean_val = np.nan if len(vals[~np.isnan(vals)]) == 0 else np.nanmedian(vals)
-    mean = f"{round(mean_val, rounding)}*{unit}" if unit is not None else mean_val
-
-    for _tstamp, tstamp_dict in in_dicts.items():
-        val = tstamp_dict
-        for i, key in enumerate(keys):
-            if i == len(keys) - 1:
-                val[key] = mean
-            else:
-                val = val[key]
-
-    fig = plt.figure()
-    plt.scatter(
-        [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in in_dicts], vals
-    )
-    plt.axhline(y=mean_val, color="r", linestyle="-")
-    plt.xlabel("time")
-    if unit is not None:
-        plt.ylabel(f"value {unit}")
-    else:
-        plt.ylabel("value")
-    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))
-    plt.gcf().autofmt_xdate()
-    plt.title(field)
-    plot_dict[field] = fig
-    plt.close()
-
-for file in args.output:
-    tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
-    Props.write_to(file, in_dicts[tstamp])
-
-if args.out_plots:
-    for file in args.out_plots:
-        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
-        if args.in_plots:
-            for infile in args.in_plots:
-                if tstamp in infile:
-                    with Path(infile).open("rb") as f:
-                        old_plot_dict = pkl.load(f)
-                    break
-            old_plot_dict.update({"psp": plot_dict})
-            new_plot_dict = old_plot_dict
-        else:
-            new_plot_dict = {"psp": plot_dict}
-        with Path(file).open("wb") as f:
-            pkl.dump(new_plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
-
-if args.out_obj:
-    for file in args.out_obj:
-        tstamp = ChannelProcKey.get_filekey_from_pattern(Path(file).name).timestamp
-        if args.in_obj:
-            for infile in args.in_obj:
-                if tstamp in infile:
-                    with Path(infile).open("rb") as f:
-                        old_obj_dict = pkl.load(f)
-                    break
-            new_obj_dict = old_obj_dict
-        else:
-            new_obj_dict = {}
-        with Path(file).open("wb") as f:
-            pkl.dump(new_obj_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
deleted file mode 100644
index a5310e9..0000000
--- a/workflow/src/legenddataflow/scripts/pars_dsp_build_svm_geds.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import argparse
-import pickle as pkl
-from pathlib import Path
-
-from dbetto import TextDB
-from dbetto.catalog import Props
-from lgdo import lh5
-from sklearn.svm import SVC
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--log", help="log file", type=str)
-argparser.add_argument("--configs", help="config file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-
-argparser.add_argument("--output_file", help="output SVM file", type=str, required=True)
-argparser.add_argument("--train_data", help="input data file", type=str, required=True)
-argparser.add_argument(
-    "--train_hyperpars", help="input hyperparameter file", required=True
-)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_build_svm"]
-
-log = build_log(config_dict, args.log)
-
-# Load files
-tb = lh5.read("ml_train/dsp", args.train_data)
-log.debug("loaded data")
-
-hyperpars = Props.read_from(args.train_hyperpars)
-
-# Define training inputs
-dwts_norm = tb["dwt_norm"].nda
-labels = tb["dc_label"].nda
-
-log.debug("training model")
-# Initialize and train SVM
-svm = SVC(
-    random_state=int(hyperpars["random_state"]),
-    kernel=hyperpars["kernel"],
-    decision_function_shape=hyperpars["decision_function_shape"],
-    class_weight=hyperpars["class_weight"],
-    C=float(hyperpars["C"]),
-    gamma=float(hyperpars["gamma"]),
-)
-
-svm.fit(dwts_norm, labels)
-log.debug("trained model")
-
-# Save trained model with pickle
-with Path(args.output_file).open("wb") as svm_file:
-    pkl.dump(svm, svm_file, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
deleted file mode 100644
index a47b653..0000000
--- a/workflow/src/legenddataflow/scripts/pars_dsp_dplms_geds.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import argparse
-import logging
-import pickle as pkl
-import time
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from lgdo import Array, Table
-from pygama.pargen.dplms_ge_dict import dplms_ge_dict
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
-argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
-argparser.add_argument("--inplots", help="in_plot_path", type=str)
-argparser.add_argument("--database", help="database", type=str, required=True)
-
-argparser.add_argument("--log", help="log_file", type=str)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
-argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True)
-argparser.add_argument("--plot_path", help="plot_path", type=str)
-
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_dplms"]
-
-log = build_log(config_dict, args.log)
-
-log = logging.getLogger(__name__)
-sto = lh5.LH5Store()
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
-
-configs = LegendMetadata(args.configs, lazy=True).on(
-    args.timestamp, system=args.datatype
-)
-dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
-
-dplms_json = config_dict["inputs"]["dplms_pars"][args.channel]
-dplms_dict = Props.read_from(dplms_json)
-
-db_dict = Props.read_from(args.database)
-
-if dplms_dict["run_dplms"] is True:
-    with Path(args.fft_raw_filelist).open() as f:
-        fft_files = sorted(f.read().splitlines())
-
-    t0 = time.time()
-    log.info("\nLoad fft data")
-    energies = sto.read(f"{channel}/raw/daqenergy", fft_files)[0]
-    idxs = np.where(energies.nda == 0)[0]
-    raw_fft = sto.read(
-        f"{channel}/raw", fft_files, n_rows=dplms_dict["n_baselines"], idx=idxs
-    )[0]
-    t1 = time.time()
-    log.info(f"Time to load fft data {(t1-t0):.2f} s, total events {len(raw_fft)}")
-
-    log.info("\nRunning event selection")
-    peaks_kev = np.array(dplms_dict["peaks_kev"])
-    kev_widths = [tuple(kev_width) for kev_width in dplms_dict["kev_widths"]]
-
-    peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][
-        "peak"
-    ].nda
-    ids = np.isin(peaks, peaks_rounded)
-    peaks = peaks[ids]
-    idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
-
-    raw_cal = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
-    log.info(
-        f"Time to run event selection {(time.time()-t1):.2f} s, total events {len(raw_cal)}"
-    )
-
-    if isinstance(dsp_config, (str, list)):
-        dsp_config = Props.read_from(dsp_config)
-
-    if args.plot_path:
-        out_dict, plot_dict = dplms_ge_dict(
-            raw_fft,
-            raw_cal,
-            dsp_config,
-            db_dict,
-            dplms_dict,
-            display=1,
-        )
-        if args.inplots:
-            with Path(args.inplots).open("rb") as r:
-                inplot_dict = pkl.load(r)
-            inplot_dict.update({"dplms": plot_dict})
-
-    else:
-        out_dict = dplms_ge_dict(
-            raw_fft,
-            raw_cal,
-            dsp_config,
-            db_dict,
-            dplms_dict,
-        )
-
-    coeffs = out_dict["dplms"].pop("coefficients")
-    dplms_pars = Table(col_dict={"coefficients": Array(coeffs)})
-    out_dict["dplms"]["coefficients"] = (
-        f"loadlh5('{args.lh5_path}', '{channel}/dplms/coefficients')"
-    )
-
-    log.info(f"DPLMS creation finished in {(time.time()-t0)/60} minutes")
-else:
-    out_dict = {}
-    dplms_pars = Table(col_dict={"coefficients": Array([])})
-    if args.inplots:
-        with Path(args.inplots).open("rb") as r:
-            inplot_dict = pkl.load(r)
-    else:
-        inplot_dict = {}
-
-db_dict.update(out_dict)
-
-Path(args.lh5_path).parent.mkdir(parents=True, exist_ok=True)
-sto.write(
-    Table(col_dict={"dplms": dplms_pars}),
-    name=channel,
-    lh5_file=args.lh5_path,
-    wo_mode="overwrite",
-)
-
-Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.dsp_pars, db_dict)
-
-if args.plot_path:
-    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
-    with Path(args.plot_path).open("wb") as f:
-        pkl.dump(inplot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
deleted file mode 100644
index c059961..0000000
--- a/workflow/src/legenddataflow/scripts/pars_dsp_eopt_geds.py
+++ /dev/null
@@ -1,395 +0,0 @@
-import argparse
-import pickle as pkl
-import time
-import warnings
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-import pygama.pargen.energy_optimisation as om  # noqa: F401
-import sklearn.gaussian_process.kernels as ker
-from dbetto import TextDB
-from dbetto.catalog import Props
-from dspeed.units import unit_registry as ureg
-from legendmeta import LegendMetadata
-from pygama.math.distributions import hpge_peak
-from pygama.pargen.dsp_optimize import (
-    BayesianOptimizer,
-    run_bayesian_optimisation,
-    run_one_dsp,
-)
-
-from ..log import build_log
-
-warnings.filterwarnings(action="ignore", category=RuntimeWarning)
-warnings.filterwarnings(action="ignore", category=np.RankWarning)
-
-
-argparser = argparse.ArgumentParser()
-
-argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
-argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
-argparser.add_argument("--inplots", help="in_plot_path", type=str)
-
-argparser.add_argument("--log", help="log_file", type=str)
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument(
-    "--final_dsp_pars", help="final_dsp_pars", type=str, required=True
-)
-argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str)
-argparser.add_argument("--plot_path", help="plot_path", type=str)
-
-argparser.add_argument(
-    "--plot_save_path", help="plot_save_path", type=str, required=False
-)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_eopt"]
-
-log = build_log(config_dict, args.log)
-
-sto = lh5.LH5Store()
-t0 = time.time()
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
-
-dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
-opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
-
-opt_dict = Props.read_from(opt_json)
-db_dict = Props.read_from(args.decay_const)
-
-if opt_dict.pop("run_eopt") is True:
-    peaks_kev = np.array(opt_dict["peaks"])
-    kev_widths = [tuple(kev_width) for kev_width in opt_dict["kev_widths"]]
-
-    kwarg_dicts_cusp = []
-    kwarg_dicts_trap = []
-    kwarg_dicts_zac = []
-    for peak in peaks_kev:
-        peak_idx = np.where(peaks_kev == peak)[0][0]
-        kev_width = kev_widths[peak_idx]
-
-        kwarg_dicts_cusp.append(
-            {
-                "parameter": "cuspEmax",
-                "func": hpge_peak,
-                "peak": peak,
-                "kev_width": kev_width,
-                "bin_width": 5,
-            }
-        )
-        kwarg_dicts_zac.append(
-            {
-                "parameter": "zacEmax",
-                "func": hpge_peak,
-                "peak": peak,
-                "kev_width": kev_width,
-                "bin_width": 5,
-            }
-        )
-        kwarg_dicts_trap.append(
-            {
-                "parameter": "trapEmax",
-                "func": hpge_peak,
-                "peak": peak,
-                "kev_width": kev_width,
-                "bin_width": 5,
-            }
-        )
-
-    peaks_rounded = [int(peak) for peak in peaks_kev]
-    peaks = sto.read(f"{channel}/raw", args.peak_file, field_mask=["peak"])[0][
-        "peak"
-    ].nda
-    ids = np.isin(peaks, peaks_rounded)
-    peaks = peaks[ids]
-    idx_list = [np.where(peaks == peak)[0] for peak in peaks_rounded]
-
-    tb_data = sto.read(f"{channel}/raw", args.peak_file, idx=ids)[0]
-
-    t1 = time.time()
-    log.info(f"Data Loaded in {(t1-t0)/60} minutes")
-
-    if isinstance(dsp_config, (str, list)):
-        dsp_config = Props.read_from(dsp_config)
-
-    dsp_config["outputs"] = ["tp_99", "tp_0_est", "dt_eff"]
-
-    init_data = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
-    full_dt = (init_data["tp_99"].nda - init_data["tp_0_est"].nda)[idx_list[-1]]
-    flat_val = np.ceil(1.1 * np.nanpercentile(full_dt, 99) / 100) / 10
-
-    if flat_val < 1.0:
-        flat_val = 1.0
-    elif flat_val > 4:
-        flat_val = 4
-    flat_val = f"{flat_val}*us"
-
-    db_dict["cusp"] = {"flat": flat_val}
-    db_dict["zac"] = {"flat": flat_val}
-    db_dict["etrap"] = {"flat": flat_val}
-
-    tb_data.add_column("dt_eff", init_data["dt_eff"])
-
-    dsp_config["processors"].pop("dt_eff")
-
-    dsp_config["outputs"] = ["zacEmax", "cuspEmax", "trapEmax", "dt_eff"]
-
-    kwarg_dict = [
-        {
-            "peak_dicts": kwarg_dicts_cusp,
-            "ctc_param": "dt_eff",
-            "idx_list": idx_list,
-            "peaks_kev": peaks_kev,
-        },
-        {
-            "peak_dicts": kwarg_dicts_zac,
-            "ctc_param": "dt_eff",
-            "idx_list": idx_list,
-            "peaks_kev": peaks_kev,
-        },
-        {
-            "peak_dicts": kwarg_dicts_trap,
-            "ctc_param": "dt_eff",
-            "idx_list": idx_list,
-            "peaks_kev": peaks_kev,
-        },
-    ]
-
-    fom = eval(opt_dict["fom"])
-    out_field = opt_dict["fom_field"]
-    out_err_field = opt_dict["fom_err_field"]
-    sample_x = np.array(opt_dict["initial_samples"])
-
-    results_cusp = []
-    results_zac = []
-    results_trap = []
-
-    sample_y_cusp = []
-    sample_y_zac = []
-    sample_y_trap = []
-
-    err_y_cusp = []
-    err_y_zac = []
-    err_y_trap = []
-
-    for i, x in enumerate(sample_x):
-        db_dict["cusp"]["sigma"] = f"{x[0]}*us"
-        db_dict["zac"]["sigma"] = f"{x[0]}*us"
-        db_dict["etrap"]["rise"] = f"{x[0]}*us"
-
-        log.info(f"Initialising values {i+1} : {db_dict}")
-
-        tb_out = run_one_dsp(tb_data, dsp_config, db_dict=db_dict, verbosity=0)
-
-        res = fom(tb_out, kwarg_dict[0])
-        results_cusp.append(res)
-        sample_y_cusp.append(res[out_field])
-        err_y_cusp.append(res[out_err_field])
-
-        res = fom(tb_out, kwarg_dict[1])
-        results_zac.append(res)
-        sample_y_zac.append(res[out_field])
-        err_y_zac.append(res[out_err_field])
-
-        res = fom(tb_out, kwarg_dict[2])
-        results_trap.append(res)
-        sample_y_trap.append(res[out_field])
-        err_y_trap.append(res[out_err_field])
-
-        log.info(f"{i+1} Finished")
-
-    if np.isnan(sample_y_cusp).all():
-        max_cusp = opt_dict["nan_default"]
-    else:
-        max_cusp = np.ceil(np.nanmax(sample_y_cusp) * 2)
-    if np.isnan(sample_y_zac).all():
-        max_zac = opt_dict["nan_default"]
-    else:
-        max_zac = np.ceil(np.nanmax(sample_y_zac) * 2)
-    if np.isnan(sample_y_trap).all():
-        max_trap = opt_dict["nan_default"]
-    else:
-        max_trap = np.ceil(np.nanmax(sample_y_trap) * 2)
-
-    nan_vals = [max_cusp, max_zac, max_trap]
-
-    for i in range(len(sample_x)):
-        if np.isnan(sample_y_cusp[i]):
-            results_cusp[i]["y_val"] = max_cusp
-            sample_y_cusp[i] = max_cusp
-
-        if np.isnan(sample_y_zac[i]):
-            results_zac[i]["y_val"] = max_zac
-            sample_y_zac[i] = max_zac
-
-        if np.isnan(sample_y_trap[i]):
-            results_trap[i]["y_val"] = max_trap
-            sample_y_trap[i] = max_trap
-
-    kernel = (
-        ker.ConstantKernel(2.0, constant_value_bounds="fixed")
-        + 1.0 * ker.RBF(1.0, length_scale_bounds=[0.5, 2.5])
-        + ker.WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-5, 1e1))
-    )
-
-    lambda_param = 5
-    sampling_rate = tb_data["waveform_presummed"]["dt"][0]
-    sampling_unit = ureg.Quantity(tb_data["waveform_presummed"]["dt"].attrs["units"])
-    waveform_sampling = sampling_rate * sampling_unit
-
-    bopt_cusp = BayesianOptimizer(
-        acq_func=opt_dict["acq_func"],
-        batch_size=opt_dict["batch_size"],
-        kernel=kernel,
-        sampling_rate=waveform_sampling,
-        fom_value=out_field,
-        fom_error=out_err_field,
-    )
-    bopt_cusp.lambda_param = lambda_param
-    bopt_cusp.add_dimension("cusp", "sigma", 0.5, 16, True, "us")
-
-    bopt_zac = BayesianOptimizer(
-        acq_func=opt_dict["acq_func"],
-        batch_size=opt_dict["batch_size"],
-        kernel=kernel,
-        sampling_rate=waveform_sampling,
-        fom_value=out_field,
-        fom_error=out_err_field,
-    )
-    bopt_zac.lambda_param = lambda_param
-    bopt_zac.add_dimension("zac", "sigma", 0.5, 16, True, "us")
-
-    bopt_trap = BayesianOptimizer(
-        acq_func=opt_dict["acq_func"],
-        batch_size=opt_dict["batch_size"],
-        kernel=kernel,
-        sampling_rate=waveform_sampling,
-        fom_value=out_field,
-        fom_error=out_err_field,
-    )
-    bopt_trap.lambda_param = lambda_param
-    bopt_trap.add_dimension("etrap", "rise", 1, 12, True, "us")
-
-    bopt_cusp.add_initial_values(
-        x_init=sample_x, y_init=sample_y_cusp, yerr_init=err_y_cusp
-    )
-    bopt_zac.add_initial_values(
-        x_init=sample_x, y_init=sample_y_zac, yerr_init=err_y_zac
-    )
-    bopt_trap.add_initial_values(
-        x_init=sample_x, y_init=sample_y_trap, yerr_init=err_y_trap
-    )
-
-    best_idx = np.nanargmin(sample_y_cusp)
-    bopt_cusp.optimal_results = results_cusp[best_idx]
-    bopt_cusp.optimal_x = sample_x[best_idx]
-
-    best_idx = np.nanargmin(sample_y_zac)
-    bopt_zac.optimal_results = results_zac[best_idx]
-    bopt_zac.optimal_x = sample_x[best_idx]
-
-    best_idx = np.nanargmin(sample_y_trap)
-    bopt_trap.optimal_results = results_trap[best_idx]
-    bopt_trap.optimal_x = sample_x[best_idx]
-
-    optimisers = [bopt_cusp, bopt_zac, bopt_trap]
-
-    out_param_dict, out_results_list = run_bayesian_optimisation(
-        tb_data,
-        dsp_config,
-        [fom],
-        optimisers,
-        fom_kwargs=kwarg_dict,
-        db_dict=db_dict,
-        nan_val=nan_vals,
-        n_iter=opt_dict["n_iter"],
-    )
-
-    Props.add_to(db_dict, out_param_dict)
-
-    # db_dict.update(out_param_dict)
-
-    t2 = time.time()
-    log.info(f"Optimiser finished in {(t2-t1)/60} minutes")
-
-    out_alpha_dict = {}
-    out_alpha_dict["cuspEmax_ctc"] = {
-        "expression": "cuspEmax*(1+dt_eff*a)",
-        "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
-    }
-
-    out_alpha_dict["cuspEftp_ctc"] = {
-        "expression": "cuspEftp*(1+dt_eff*a)",
-        "parameters": {"a": float(round(bopt_cusp.optimal_results["alpha"], 9))},
-    }
-
-    out_alpha_dict["zacEmax_ctc"] = {
-        "expression": "zacEmax*(1+dt_eff*a)",
-        "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
-    }
-
-    out_alpha_dict["zacEftp_ctc"] = {
-        "expression": "zacEftp*(1+dt_eff*a)",
-        "parameters": {"a": float(round(bopt_zac.optimal_results["alpha"], 9))},
-    }
-
-    out_alpha_dict["trapEmax_ctc"] = {
-        "expression": "trapEmax*(1+dt_eff*a)",
-        "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
-    }
-
-    out_alpha_dict["trapEftp_ctc"] = {
-        "expression": "trapEftp*(1+dt_eff*a)",
-        "parameters": {"a": float(round(bopt_trap.optimal_results["alpha"], 9))},
-    }
-    if "ctc_params" in db_dict:
-        db_dict["ctc_params"].update(out_alpha_dict)
-    else:
-        db_dict.update({"ctc_params": out_alpha_dict})
-
-    Path(args.qbb_grid_path).parent.mkdir(parents=True, exist_ok=True)
-    with Path(args.qbb_grid_path).open("wb") as f:
-        pkl.dump(optimisers, f)
-
-else:
-    Path(args.qbb_grid_path).touch()
-
-Path(args.final_dsp_pars).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.final_dsp_pars, db_dict)
-
-if args.plot_path:
-    if args.inplots:
-        with Path(args.inplots).open("rb") as r:
-            plot_dict = pkl.load(r)
-    else:
-        plot_dict = {}
-
-    plot_dict["trap_optimisation"] = {
-        "kernel_space": bopt_trap.plot(init_samples=sample_x),
-        "acq_space": bopt_trap.plot_acq(init_samples=sample_x),
-    }
-
-    plot_dict["cusp_optimisation"] = {
-        "kernel_space": bopt_cusp.plot(init_samples=sample_x),
-        "acq_space": bopt_cusp.plot_acq(init_samples=sample_x),
-    }
-
-    plot_dict["zac_optimisation"] = {
-        "kernel_space": bopt_zac.plot(init_samples=sample_x),
-        "acq_space": bopt_zac.plot_acq(init_samples=sample_x),
-    }
-
-    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
-    with Path(args.plot_path).open("wb") as w:
-        pkl.dump(plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
deleted file mode 100644
index 7e843e8..0000000
--- a/workflow/src/legenddataflow/scripts/pars_dsp_nopt_geds.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import argparse
-import pickle as pkl
-import time
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-import pygama.pargen.noise_optimization as pno
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.pargen.data_cleaning import generate_cuts, get_cut_indexes
-from pygama.pargen.dsp_optimize import run_one_dsp
-
-from ..log import build_log
-
-sto = lh5.LH5Store()
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
-argparser.add_argument("--database", help="database", type=str, required=True)
-argparser.add_argument("--inplots", help="inplots", type=str)
-
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-argparser.add_argument("--log", help="log_file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
-argparser.add_argument("--plot_path", help="plot_path", type=str)
-
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
-
-log = build_log(config_dict, args.log)
-
-
-t0 = time.time()
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
-
-dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
-opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
-
-opt_dict = Props.read_from(opt_json)
-db_dict = Props.read_from(args.database)
-
-if opt_dict.pop("run_nopt") is True:
-    with Path(args.raw_filelist).open() as f:
-        files = f.read().splitlines()
-
-    raw_files = sorted(files)
-
-    energies = sto.read(f"{channel}/raw/daqenergy", raw_files)[0]
-    idxs = np.where(energies.nda == 0)[0]
-    tb_data = sto.read(
-        f"{channel}/raw", raw_files, n_rows=opt_dict["n_events"], idx=idxs
-    )[0]
-    t1 = time.time()
-    log.info(f"Time to open raw files {t1-t0:.2f} s, n. baselines {len(tb_data)}")
-
-    log.info(f"Select baselines {len(tb_data)}")
-    dsp_data = run_one_dsp(tb_data, dsp_config)
-    cut_dict = generate_cuts(dsp_data, cut_dict=opt_dict.pop("cut_pars"))
-    cut_idxs = get_cut_indexes(dsp_data, cut_dict)
-    tb_data = sto.read(
-        f"{channel}/raw", raw_files, n_rows=opt_dict.pop("n_events"), idx=idxs[cut_idxs]
-    )[0]
-    log.info(f"... {len(tb_data)} baselines after cuts")
-
-    if isinstance(dsp_config, (str, list)):
-        dsp_config = Props.read_from(dsp_config)
-
-    if args.plot_path:
-        out_dict, plot_dict = pno.noise_optimization(
-            tb_data, dsp_config, db_dict.copy(), opt_dict, channel, display=1
-        )
-    else:
-        out_dict = pno.noise_optimization(
-            raw_files, dsp_config, db_dict.copy(), opt_dict, channel
-        )
-
-    t2 = time.time()
-    log.info(f"Optimiser finished in {(t2-t0)/60} minutes")
-else:
-    out_dict = {}
-    plot_dict = {}
-
-if args.plot_path:
-    Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
-    if args.inplots:
-        with Path(args.inplots).open("rb") as r:
-            old_plot_dict = pkl.load(r)
-        plot_dict = dict(noise_optimisation=plot_dict, **old_plot_dict)
-    else:
-        plot_dict = {"noise_optimisation": plot_dict}
-    with Path(args.plot_path).open("wb") as f:
-        pkl.dump(plot_dict, f, protocol=pkl.HIGHEST_PROTOCOL)
-
-Path(args.dsp_pars).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.dsp_pars, dict(nopt_pars=out_dict, **db_dict))
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py
deleted file mode 100644
index 67d8a64..0000000
--- a/workflow/src/legenddataflow/scripts/pars_dsp_svm_geds.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import argparse
-from pathlib import Path
-
-from dbetto.catalog import Props
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--log", help="log file", type=str)
-argparser.add_argument("--output_file", help="output par file", type=str, required=True)
-argparser.add_argument("--input_file", help="input par file", type=str, required=True)
-argparser.add_argument("--svm_file", help="svm file", required=True)
-args = argparser.parse_args()
-
-par_data = Props.read_from(args.input_file)
-
-file = f"'$_/{Path(args.svm_file).name}'"
-
-par_data["svm"] = {"model_file": file}
-
-Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.output_file, par_data)
diff --git a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py b/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
deleted file mode 100644
index 1ca084b..0000000
--- a/workflow/src/legenddataflow/scripts/pars_dsp_tau_geds.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import argparse
-import pickle as pkl
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
-from pygama.pargen.dsp_optimize import run_one_dsp
-from pygama.pargen.extract_tau import ExtractTau
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs path", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-argparser.add_argument("--log", help="log file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument("--plot_path", help="plot path", type=str, required=False)
-argparser.add_argument("--output_file", help="output file", type=str, required=True)
-
-argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False)
-
-argparser.add_argument("--raw_files", help="input files", nargs="*", type=str)
-argparser.add_argument(
-    "--tcm_files", help="tcm_files", nargs="*", type=str, required=False
-)
-args = argparser.parse_args()
-
-sto = lh5.LH5Store()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_dsp_nopt"]
-
-log = build_log(config_dict, args.log)
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
-
-channel_dict = config_dict["inputs"]["processing_chain"][args.channel]
-kwarg_dict = config_dict["inputs"]["tau_config"][args.channel]
-
-kwarg_dict = Props.read_from(kwarg_dict)
-
-if kwarg_dict["run_tau"] is True:
-    dsp_config = Props.read_from(channel_dict)
-    kwarg_dict.pop("run_tau")
-    if (
-        isinstance(args.raw_files, list)
-        and args.raw_files[0].split(".")[-1] == "filelist"
-    ):
-        input_file = args.raw_files[0]
-        with Path(input_file).open() as f:
-            input_file = f.read().splitlines()
-    else:
-        input_file = args.raw_files
-
-    if args.pulser_file:
-        pulser_dict = Props.read_from(args.pulser_file)
-        mask = np.array(pulser_dict["mask"])
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
-
-    data = sto.read(
-        f"{channel}/raw", input_file, field_mask=["daqenergy", "timestamp", "t_sat_lo"]
-    )[0].view_as("pd")
-    threshold = kwarg_dict.pop("threshold")
-
-    discharges = data["t_sat_lo"] > 0
-    discharge_timestamps = np.where(data["timestamp"][discharges])[0]
-    is_recovering = np.full(len(data), False, dtype=bool)
-    for tstamp in discharge_timestamps:
-        is_recovering = is_recovering | np.where(
-            (
-                ((data["timestamp"] - tstamp) < 0.01)
-                & ((data["timestamp"] - tstamp) > 0)
-            ),
-            True,
-            False,
-        )
-    cuts = np.where(
-        (data.daqenergy.to_numpy() > threshold) & (~mask) & (~is_recovering)
-    )[0]
-
-    tb_data = sto.read(
-        f"{channel}/raw",
-        input_file,
-        idx=cuts,
-        n_rows=kwarg_dict.pop("n_events"),
-    )[0]
-
-    tb_out = run_one_dsp(tb_data, dsp_config)
-    log.debug("Processed Data")
-    cut_parameters = kwarg_dict.get("cut_parameters", None)
-    if cut_parameters is not None:
-        idxs = get_cut_indexes(tb_out, cut_parameters=cut_parameters)
-        log.debug("Applied cuts")
-        log.debug(f"{len(idxs)} events passed cuts")
-    else:
-        idxs = np.full(len(tb_out), True, dtype=bool)
-
-    tau = ExtractTau(dsp_config, kwarg_dict["wf_field"])
-    slopes = tb_out["tail_slope"].nda
-    log.debug("Calculating pz constant")
-
-    tau.get_decay_constant(slopes[idxs], tb_data[kwarg_dict["wf_field"]])
-
-    if args.plot_path:
-        Path(args.plot_path).parent.mkdir(parents=True, exist_ok=True)
-
-        plot_dict = tau.plot_waveforms_after_correction(
-            tb_data, "wf_pz", norm_param=kwarg_dict.get("norm_param", "pz_mean")
-        )
-        plot_dict.update(tau.plot_slopes(slopes[idxs]))
-
-        with Path(args.plot_path).open("wb") as f:
-            pkl.dump({"tau": plot_dict}, f, protocol=pkl.HIGHEST_PROTOCOL)
-else:
-    out_dict = {}
-
-Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.output_file, tau.output_dict)
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py b/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
deleted file mode 100644
index 7e13ed8..0000000
--- a/workflow/src/legenddataflow/scripts/pars_hit_aoe.py
+++ /dev/null
@@ -1,290 +0,0 @@
-from __future__ import annotations
-
-import argparse
-import pickle as pkl
-import warnings
-from pathlib import Path
-from typing import Callable
-
-import numpy as np
-import pandas as pd
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.pargen.AoE_cal import *  # noqa: F403
-from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
-from pygama.pargen.utils import load_data
-
-from ..convert_np import convert_dict_np_to_float
-from ..log import build_log
-
-warnings.filterwarnings(action="ignore", category=RuntimeWarning)
-
-
-def get_results_dict(aoe_class):
-    return {
-        "cal_energy_param": aoe_class.cal_energy_param,
-        "dt_param": aoe_class.dt_param,
-        "rt_correction": aoe_class.dt_corr,
-        "1000-1300keV": aoe_class.timecorr_df.to_dict("index"),
-        "correction_fit_results": aoe_class.energy_corr_res_dict,
-        "low_cut": aoe_class.low_cut_val,
-        "high_cut": aoe_class.high_cut_val,
-        "low_side_sfs": aoe_class.low_side_sfs.to_dict("index"),
-        "2_side_sfs": aoe_class.two_side_sfs.to_dict("index"),
-    }
-
-
-def fill_plot_dict(aoe_class, data, plot_options, plot_dict=None):
-    if plot_dict is not None:
-        for key, item in plot_options.items():
-            if item["options"] is not None:
-                plot_dict[key] = item["function"](aoe_class, data, **item["options"])
-            else:
-                plot_dict[key] = item["function"](aoe_class, data)
-    else:
-        plot_dict = {}
-    return plot_dict
-
-
-def aoe_calibration(
-    data: pd.Dataframe,
-    cal_dicts: dict,
-    current_param: str,
-    energy_param: str,
-    cal_energy_param: str,
-    eres_func: Callable,
-    pdf: Callable = aoe_peak,
-    selection_string: str = "",
-    dt_corr: bool = False,
-    dep_correct: bool = False,
-    dt_cut: dict | None = None,
-    high_cut_val: int = 3,
-    mean_func: Callable = Pol1,
-    sigma_func: Callable = SigmaFit,
-    # dep_acc: float = 0.9,
-    dt_param: str = "dt_eff",
-    comptBands_width: int = 20,
-    plot_options: dict | None = None,
-    debug_mode: bool = False,
-):
-    data["AoE_Uncorr"] = data[current_param] / data[energy_param]
-    aoe = CalAoE(
-        cal_dicts=cal_dicts,
-        cal_energy_param=cal_energy_param,
-        eres_func=eres_func,
-        pdf=pdf,
-        selection_string=selection_string,
-        dt_corr=dt_corr,
-        dep_correct=dep_correct,
-        dt_cut=dt_cut,
-        dt_param=dt_param,
-        high_cut_val=high_cut_val,
-        mean_func=mean_func,
-        sigma_func=sigma_func,
-        compt_bands_width=comptBands_width,
-        debug_mode=debug_mode | args.debug,
-    )
-
-    aoe.update_cal_dicts(
-        {
-            "AoE_Uncorr": {
-                "expression": f"{current_param}/{energy_param}",
-                "parameters": {},
-            }
-        }
-    )
-
-    aoe.calibrate(data, "AoE_Uncorr")
-    log.info("Calibrated A/E")
-    return (
-        cal_dicts,
-        get_results_dict(aoe),
-        fill_plot_dict(aoe, data, plot_options),
-        aoe,
-    )
-
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("files", help="files", nargs="*", type=str)
-argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
-
-argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
-argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
-argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
-
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--log", help="log_file", type=str)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
-argparser.add_argument("--hit_pars", help="hit_pars", type=str)
-argparser.add_argument("--aoe_results", help="aoe_results", type=str)
-
-argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_hit_aoecal"]
-
-log = build_log(config_dict, args.log)
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
-
-channel_dict = config_dict["inputs"]["aoecal_config"][args.channel]
-kwarg_dict = Props.read_from(channel_dict)
-
-
-ecal_dict = Props.read_from(args.ecal_file)
-cal_dict = ecal_dict["pars"]
-eres_dict = ecal_dict["results"]["ecal"]
-
-with Path(args.eres_file).open("rb") as o:
-    object_dict = pkl.load(o)
-
-if kwarg_dict["run_aoe"] is True:
-    kwarg_dict.pop("run_aoe")
-
-    pdf = eval(kwarg_dict.pop("pdf")) if "pdf" in kwarg_dict else aoe_peak
-
-    sigma_func = (
-        eval(kwarg_dict.pop("sigma_func")) if "sigma_func" in kwarg_dict else SigmaFit
-    )
-
-    mean_func = eval(kwarg_dict.pop("mean_func")) if "mean_func" in kwarg_dict else Pol1
-
-    if "plot_options" in kwarg_dict:
-        for field, item in kwarg_dict["plot_options"].items():
-            kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
-
-    with Path(args.files[0]).open() as f:
-        files = f.read().splitlines()
-    files = sorted(files)
-
-    try:
-        eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy()
-
-        def eres_func(x):
-            return eval(eres["expression"], dict(x=x, **eres["parameters"]))
-
-    except KeyError:
-
-        def eres_func(x):
-            return x * np.nan
-
-    params = [
-        kwarg_dict["current_param"],
-        "tp_0_est",
-        "tp_99",
-        kwarg_dict["energy_param"],
-        kwarg_dict["cal_energy_param"],
-        kwarg_dict["cut_field"],
-        "timestamp",
-    ]
-
-    if "dt_param" in kwarg_dict:
-        params += kwarg_dict["dt_param"]
-    else:
-        params += "dt_eff"
-
-    if "dt_cut" in kwarg_dict and kwarg_dict["dt_cut"] is not None:
-        cal_dict.update(kwarg_dict["dt_cut"]["cut"])
-        params.append(kwarg_dict["dt_cut"]["out_param"])
-
-    # load data in
-    data, threshold_mask = load_data(
-        files,
-        f"{channel}/dsp",
-        cal_dict,
-        params=params,
-        threshold=kwarg_dict.pop("threshold"),
-        return_selection_mask=True,
-    )
-
-    if args.pulser_file:
-        pulser_dict = Props.read_from(args.pulser_file)
-        mask = np.array(pulser_dict["mask"])
-        if "pulser_multiplicity_threshold" in kwarg_dict:
-            kwarg_dict.pop("pulser_multiplicity_threshold")
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
-
-    data["is_pulser"] = mask[threshold_mask]
-
-    cal_dict, out_dict, plot_dict, obj = aoe_calibration(
-        data,
-        cal_dicts=cal_dict,
-        eres_func=eres_func,
-        selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)",
-        pdf=pdf,
-        mean_func=mean_func,
-        sigma_func=sigma_func,
-        **kwarg_dict,
-    )
-    obj.pdf = obj.pdf.name
-
-    # need to change eres func as can't pickle lambdas
-    try:
-        obj.eres_func = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy()
-    except KeyError:
-        obj.eres_func = {}
-else:
-    out_dict = {}
-    plot_dict = {}
-    obj = None
-
-if args.plot_file:
-    common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
-    if args.inplots:
-        with Path(args.inplots).open("rb") as r:
-            out_plot_dict = pkl.load(r)
-        out_plot_dict.update({"aoe": plot_dict})
-    else:
-        out_plot_dict = {"aoe": plot_dict}
-
-    if "common" in list(out_plot_dict) and common_dict is not None:
-        out_plot_dict["common"].update(common_dict)
-    elif common_dict is not None:
-        out_plot_dict["common"] = common_dict
-
-    Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
-    with Path(args.plot_file).open("wb") as w:
-        pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
-
-Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
-results_dict = dict(**ecal_dict["results"], aoe=out_dict)
-final_hit_dict = {
-    "pars": {"operations": cal_dict},
-    "results": results_dict,
-}
-
-final_hit_dict = convert_dict_np_to_float(final_hit_dict)
-
-Props.write_to(args.hit_pars, final_hit_dict)
-
-Path(args.aoe_results).parent.mkdir(parents=True, exist_ok=True)
-final_object_dict = dict(
-    **object_dict,
-    aoe=obj,
-)
-with Path(args.aoe_results).open("wb") as w:
-    pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_hit_lq.py b/workflow/src/legenddataflow/scripts/pars_hit_lq.py
deleted file mode 100644
index a7a2601..0000000
--- a/workflow/src/legenddataflow/scripts/pars_hit_lq.py
+++ /dev/null
@@ -1,283 +0,0 @@
-from __future__ import annotations
-
-import argparse
-import pickle as pkl
-import warnings
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.math.distributions import gaussian
-from pygama.pargen.AoE_cal import *  # noqa: F403
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
-from pygama.pargen.lq_cal import *  # noqa: F403
-from pygama.pargen.lq_cal import LQCal
-from pygama.pargen.utils import load_data
-
-from ..convert_np import convert_dict_np_to_float
-from ..log import build_log
-
-warnings.filterwarnings(action="ignore", category=RuntimeWarning)
-
-
-def get_results_dict(lq_class):
-    return {
-        "cal_energy_param": lq_class.cal_energy_param,
-        "DEP_means": lq_class.timecorr_df.to_dict("index"),
-        "rt_correction": lq_class.dt_fit_pars,
-        "cut_fit_pars": lq_class.cut_fit_pars.to_dict(),
-        "cut_value": lq_class.cut_val,
-        "sfs": lq_class.low_side_sf.to_dict("index"),
-    }
-
-
-def fill_plot_dict(lq_class, data, plot_options, plot_dict=None):
-    if plot_dict is not None:
-        for key, item in plot_options.items():
-            if item["options"] is not None:
-                plot_dict[key] = item["function"](lq_class, data, **item["options"])
-            else:
-                plot_dict[key] = item["function"](lq_class, data)
-    else:
-        plot_dict = {}
-    return plot_dict
-
-
-def lq_calibration(
-    data: pd.DataFrame,
-    cal_dicts: dict,
-    energy_param: str,
-    cal_energy_param: str,
-    dt_param: str,
-    eres_func: callable,
-    cdf: callable = gaussian,
-    selection_string: str = "",
-    plot_options: dict | None = None,
-    debug_mode: bool = False,
-):
-    """Loads in data from the provided files and runs the LQ calibration on said files
-
-    Parameters
-    ----------
-    data: pd.DataFrame
-        A dataframe containing the data used for calibrating LQ
-    cal_dicts: dict
-        A dict of hit-level operations to apply to the data
-    energy_param: string
-        The energy parameter of choice. Used for normalizing the
-        raw lq values
-    cal_energy_param: string
-        The calibrated energy parameter of choice
-    dt_param: string
-        The drift time parameter of choice
-    eres_func: callable
-        The energy resolution functions
-    cdf: callable
-        The CDF used for the binned fitting of LQ distributions
-    selection_string: string
-        A string of flags to apply to the data when running the calibration
-    plot_options: dict
-        A dict containing the plot functions the user wants to run,and any
-        user options to provide those plot functions
-
-    Returns
-    -------
-    cal_dicts: dict
-        The user provided dict, updated with hit-level operations for LQ
-    results_dict: dict
-        A dict containing the results of the LQ calibration
-    plot_dict: dict
-        A dict containing all the figures specified by the plot options
-    lq: LQCal class
-        The LQCal object used for the LQ calibration
-    """
-
-    lq = LQCal(
-        cal_dicts,
-        cal_energy_param,
-        dt_param,
-        eres_func,
-        cdf,
-        selection_string,
-        debug_mode=debug_mode | args.debug,
-    )
-
-    data["LQ_Ecorr"] = np.divide(data["lq80"], data[energy_param])
-
-    lq.update_cal_dicts(
-        {
-            "LQ_Ecorr": {
-                "expression": f"lq80/{energy_param}",
-                "parameters": {},
-            }
-        }
-    )
-
-    lq.calibrate(data, "LQ_Ecorr")
-    log.info("Calibrated LQ")
-    return cal_dicts, get_results_dict(lq), fill_plot_dict(lq, data, plot_options), lq
-
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("files", help="files", nargs="*", type=str)
-argparser.add_argument("--pulser_file", help="pulser_file", type=str, required=False)
-argparser.add_argument("--tcm_filelist", help="tcm_filelist", type=str, required=False)
-
-argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
-argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
-argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
-
-argparser.add_argument("--configs", help="configs", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-argparser.add_argument("--log", help="log_file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
-argparser.add_argument("--hit_pars", help="hit_pars", type=str)
-argparser.add_argument("--lq_results", help="lq_results", type=str)
-
-argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_hit_lqcal"]
-
-log = build_log(config_dict, args.log)
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
-
-
-channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
-kwarg_dict = Props.read_from(channel_dict)
-
-ecal_dict = Props.read_from(args.ecal_file)
-cal_dict = ecal_dict["pars"]["operations"]
-eres_dict = ecal_dict["results"]["ecal"]
-
-with Path(args.eres_file).open("rb") as o:
-    object_dict = pkl.load(o)
-
-if kwarg_dict["run_lq"] is True:
-    kwarg_dict.pop("run_lq")
-
-    cdf = eval(kwarg_dict.pop("cdf")) if "cdf" in kwarg_dict else gaussian
-
-    if "plot_options" in kwarg_dict:
-        for field, item in kwarg_dict["plot_options"].items():
-            kwarg_dict["plot_options"][field]["function"] = eval(item["function"])
-
-    with Path(args.files[0]).open() as f:
-        files = f.read().splitlines()
-    files = sorted(files)
-
-    try:
-        eres = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy()
-
-        def eres_func(x):
-            return eval(eres["expression"], dict(x=x, **eres["parameters"]))
-
-    except KeyError:
-
-        def eres_func(x):
-            return x * np.nan
-
-    params = [
-        "lq80",
-        "dt_eff",
-        kwarg_dict["energy_param"],
-        kwarg_dict["cal_energy_param"],
-        kwarg_dict["cut_field"],
-    ]
-
-    # load data in
-    data, threshold_mask = load_data(
-        files,
-        f"{channel}/dsp",
-        cal_dict,
-        params=params,
-        threshold=kwarg_dict.pop("threshold"),
-        return_selection_mask=True,
-    )
-
-    if args.pulser_file:
-        pulser_dict = Props.read_from(args.pulser_file)
-        mask = np.array(pulser_dict["mask"])
-        if "pulser_multiplicity_threshold" in kwarg_dict:
-            kwarg_dict.pop("pulser_multiplicity_threshold")
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
-
-    data["is_pulser"] = mask[threshold_mask]
-
-    cal_dict, out_dict, plot_dict, obj = lq_calibration(
-        data,
-        selection_string=f"{kwarg_dict.pop('cut_field')}&(~is_pulser)",
-        cal_dicts=cal_dict,
-        eres_func=eres_func,
-        cdf=cdf,
-        **kwarg_dict,
-    )
-
-    # need to change eres func as can't pickle lambdas
-    try:
-        obj.eres_func = eres_dict[kwarg_dict["cal_energy_param"]]["eres_linear"].copy()
-    except KeyError:
-        obj.eres_func = {}
-else:
-    out_dict = {}
-    plot_dict = {}
-    obj = None
-
-if args.plot_file:
-    common_dict = plot_dict.pop("common") if "common" in list(plot_dict) else None
-    if args.inplots:
-        with Path(args.inplots).open("rb") as r:
-            out_plot_dict = pkl.load(r)
-        out_plot_dict.update({"lq": plot_dict})
-    else:
-        out_plot_dict = {"lq": plot_dict}
-
-    if "common" in list(out_plot_dict) and common_dict is not None:
-        out_plot_dict["common"].update(common_dict)
-    elif common_dict is not None:
-        out_plot_dict["common"] = common_dict
-
-    Path(args.plot_file).parent.mkdir(parents=True, exist_ok=True)
-    with Path(args.plot_file).open("wb") as w:
-        pkl.dump(out_plot_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
-
-
-final_hit_dict = convert_dict_np_to_float(
-    {
-        "pars": {"operations": cal_dict},
-        "results": dict(**eres_dict, lq=out_dict),
-    }
-)
-Path(args.hit_pars).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.hit_pars, final_hit_dict)
-
-final_object_dict = dict(
-    **object_dict,
-    lq=obj,
-)
-Path(args.lq_results).parent.mkdir(parents=True, exist_ok=True)
-with Path(args.lq_results).open("wb") as w:
-    pkl.dump(final_object_dict, w, protocol=pkl.HIGHEST_PROTOCOL)
diff --git a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
deleted file mode 100644
index ad46f0c..0000000
--- a/workflow/src/legenddataflow/scripts/pars_tcm_pulser.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import argparse
-import logging
-from pathlib import Path
-
-import lgdo.lh5 as lh5
-import numpy as np
-from dbetto import TextDB
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
-
-from ..log import build_log
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--configs", help="configs path", type=str, required=True)
-argparser.add_argument("--metadata", help="metadata", type=str, required=True)
-argparser.add_argument("--log", help="log file", type=str)
-
-argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
-argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
-argparser.add_argument("--channel", help="Channel", type=str, required=True)
-
-argparser.add_argument("--pulser_file", help="pulser file", type=str, required=False)
-
-argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
-args = argparser.parse_args()
-
-configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
-config_dict = configs["snakemake_rules"]["pars_tcm_pulser"]
-
-log = build_log(config_dict, args.log)
-
-sto = lh5.LH5Store()
-log = logging.getLogger(__name__)
-
-
-kwarg_dict = config_dict["inputs"]["pulser_config"]
-kwarg_dict = Props.read_from(kwarg_dict)
-
-meta = LegendMetadata(path=args.metadata)
-channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-channel = f"ch{channel_dict[args.channel].daq.rawid}"
-
-if isinstance(args.tcm_files, list) and args.tcm_files[0].split(".")[-1] == "filelist":
-    tcm_files = args.tcm_files[0]
-    with Path(tcm_files).open() as f:
-        tcm_files = f.read().splitlines()
-else:
-    tcm_files = args.tcm_files
-# get pulser mask from tcm files
-tcm_files = sorted(np.unique(tcm_files))
-ids, mask = get_tcm_pulser_ids(
-    tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-)
-
-Path(args.pulser_file).parent.mkdir(parents=True, exist_ok=True)
-Props.write_to(args.pulser_file, {"idxs": ids.tolist(), "mask": mask.tolist()})
diff --git a/workflow/src/legenddataflow/scripts/tier/dsp.py b/workflow/src/legenddataflow/scripts/tier/dsp.py
new file mode 100644
index 0000000..906985b
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/dsp.py
@@ -0,0 +1,171 @@
+import argparse
+import re
+import time
+import warnings
+from pathlib import Path
+
+import numpy as np
+from dbetto import TextDB
+from dbetto.catalog import Props
+from dspeed import build_dsp
+from legendmeta import LegendMetadata
+from lgdo import lh5
+
+from ...log import build_log
+
+warnings.filterwarnings(action="ignore", category=RuntimeWarning)
+
+
+def replace_list_with_array(dic):
+    for key, value in dic.items():
+        if isinstance(value, dict):
+            dic[key] = replace_list_with_array(value)
+        elif isinstance(value, list):
+            dic[key] = np.array(value, dtype="float32")
+        else:
+            pass
+    return dic
+
+
+def build_tier_dsp() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--configs", help="configs path", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--tier", help="Tier", type=str, required=True)
+
+    argparser.add_argument(
+        "--pars_file", help="database file for detector", nargs="*", default=[]
+    )
+    argparser.add_argument("--input", help="input file", type=str)
+
+    argparser.add_argument("--output", help="output file", type=str)
+    argparser.add_argument("--db_file", help="db file", type=str)
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True)
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"]
+    if args.tier in ["dsp", "psp"]:
+        config_dict = config_dict["tier_dsp"]
+    elif args.tier in ["ann", "pan"]:
+        config_dict = config_dict["tier_ann"]
+    else:
+        msg = f"Tier {args.tier} not supported"
+        raise ValueError(msg)
+
+    log = build_log(config_dict, args.log)
+
+    channel_dict = config_dict["inputs"]["processing_chain"]
+    settings_dict = config_dict["options"].get("settings", {})
+    if isinstance(settings_dict, str):
+        settings_dict = Props.read_from(settings_dict)
+
+    meta = LegendMetadata(path=args.metadata)
+    chan_map = meta.channelmap(args.timestamp, system=args.datatype)
+
+    if args.tier in ["ann", "pan"]:
+        channel_dict = {
+            f"ch{chan_map[chan].daq.rawid:07}/dsp": Props.read_from(file)
+            for chan, file in channel_dict.items()
+        }
+    else:
+        channel_dict = {
+            f"ch{chan_map[chan].daq.rawid:07}/raw": Props.read_from(file)
+            for chan, file in channel_dict.items()
+        }
+    db_files = [
+        par_file
+        for par_file in args.pars_file
+        if Path(par_file).suffix in (".json", ".yaml", ".yml")
+    ]
+
+    database_dic = Props.read_from(db_files, subst_pathvar=True)
+    database_dic = replace_list_with_array(database_dic)
+
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    rng = np.random.default_rng()
+    rand_num = f"{rng.integers(0, 99999):05d}"
+    temp_output = f"{args.output}.{rand_num}"
+
+    start = time.time()
+
+    build_dsp(
+        args.input,
+        temp_output,
+        {},
+        database=database_dic,
+        chan_config=channel_dict,
+        write_mode="r",
+        buffer_len=settings_dict.get("buffer_len", 1000),
+        block_width=settings_dict.get("block_width", 16),
+    )
+
+    log.info(f"build_dsp finished in {time.time()-start}")
+    Path(temp_output).rename(args.output)
+
+    key = Path(args.output).name.replace(f"-tier_{args.tier}.lh5", "")
+
+    if args.tier in ["dsp", "psp"]:
+        raw_channels = [
+            channel for channel in lh5.ls(args.input) if re.match("(ch\\d{7})", channel)
+        ]
+        raw_fields = [
+            field.split("/")[-1]
+            for field in lh5.ls(args.input, f"{raw_channels[0]}/raw/")
+        ]
+
+        outputs = {}
+        channels = []
+        for channel, chan_dict in channel_dict.items():
+            output = chan_dict["outputs"]
+            in_dict = False
+            for entry in outputs:
+                if outputs[entry]["fields"] == output:
+                    outputs[entry]["channels"].append(channel.split("/")[0])
+                    in_dict = True
+            if in_dict is False:
+                outputs[f"group{len(list(outputs))+1}"] = {
+                    "channels": [channel.split("/")[0]],
+                    "fields": output,
+                }
+            channels.append(channel.split("/")[0])
+
+        full_dict = {
+            "valid_fields": {
+                "raw": {"group1": {"fields": raw_fields, "channels": raw_channels}},
+                "dsp": outputs,
+            },
+            "valid_keys": {
+                key: {"valid_channels": {"raw": raw_channels, "dsp": channels}}
+            },
+        }
+    else:
+        outputs = {}
+        channels = []
+        for channel, chan_dict in channel_dict.items():
+            output = chan_dict["outputs"]
+            in_dict = False
+            for entry in outputs:
+                if outputs[entry]["fields"] == output:
+                    outputs[entry]["channels"].append(channel.split("/")[0])
+                    in_dict = True
+            if in_dict is False:
+                outputs[f"group{len(list(outputs))+1}"] = {
+                    "channels": [channel.split("/")[0]],
+                    "fields": output,
+                }
+            channels.append(channel.split("/")[0])
+
+        full_dict = {
+            "valid_fields": {
+                "ann": outputs,
+            },
+            "valid_keys": {key: {"valid_channels": {"ann": channels}}},
+        }
+
+    Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.db_file, full_dict)
diff --git a/workflow/src/legenddataflow/scripts/tier/evt.py b/workflow/src/legenddataflow/scripts/tier/evt.py
new file mode 100644
index 0000000..15a76d1
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/evt.py
@@ -0,0 +1,187 @@
+import argparse
+import json
+import time
+from pathlib import Path
+
+import lgdo.lh5 as lh5
+import numpy as np
+from dbetto import Props, TextDB
+from legendmeta import LegendMetadata
+from lgdo.types import Array
+from pygama.evt import build_evt
+
+from ...log import build_log
+
+sto = lh5.LH5Store()
+
+
+def find_matching_values_with_delay(arr1, arr2, jit_delay):
+    matching_values = []
+
+    # Create an array with all possible delay values
+    delays = np.arange(0, int(1e9 * jit_delay)) * jit_delay
+
+    for delay in delays:
+        arr2_delayed = arr2 + delay
+
+        # Find matching values and indices
+        mask = np.isin(arr1, arr2_delayed, assume_unique=True)
+        matching_values.extend(arr1[mask])
+
+    return np.unique(matching_values)
+
+
+def build_tier_evt() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--hit_file", help="hit file", type=str)
+    argparser.add_argument("--dsp_file", help="dsp file", type=str)
+    argparser.add_argument("--tcm_file", help="tcm file", type=str)
+    argparser.add_argument("--ann_file", help="ann file")
+    argparser.add_argument("--xtc_file", help="xtc file", type=str)
+    argparser.add_argument("--par_files", help="par files", nargs="*")
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--tier", help="Tier", type=str, required=True)
+
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--output", help="output file", type=str)
+    args = argparser.parse_args()
+
+    # load in config
+    configs = TextDB(args.configs, lazy=True)
+    if args.tier in ("evt", "pet"):
+        rule_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+            "tier_evt"
+        ]
+
+    else:
+        msg = "unknown tier"
+        raise ValueError(msg)
+
+    config_dict = rule_dict["inputs"]
+    evt_config_file = config_dict["evt_config"]
+
+    log = build_log(rule_dict, args.log)
+
+    meta = LegendMetadata(args.metadata, lazy=True)
+    chmap = meta.channelmap(args.timestamp)
+
+    evt_config = Props.read_from(evt_config_file)
+
+    if args.datatype in ("phy", "xtc"):
+        exp_string = evt_config["operations"]["geds___energy"]["expression"]
+        exp_string = exp_string.replace(
+            'xtalk_matrix_filename=""', f'xtalk_matrix_filename="{args.xtc_file}"'
+        )
+        exp_string = exp_string.replace(
+            'cal_par_files=""', f"cal_par_files={args.par_files}"
+        )
+        exp_string2 = exp_string.replace(
+            'return_mode="energy"', 'return_mode="tcm_index"'
+        )
+
+        file_path_config = {
+            "operations": {
+                "geds___energy": {"expression": exp_string},
+                "_geds___tcm_idx": {"expression": exp_string2},
+            }
+        }
+
+        log.debug(json.dumps(file_path_config, indent=2))
+
+        Props.add_to(evt_config, file_path_config)
+
+    # block for snakemake to fill in channel lists
+    for field, dic in evt_config["channels"].items():
+        if isinstance(dic, dict):
+            chans = chmap.map("system", unique=False)[dic["system"]]
+            if "selectors" in dic:
+                try:
+                    for k, val in dic["selectors"].items():
+                        chans = chans.map(k, unique=False)[val]
+                except KeyError:
+                    chans = None
+            if chans is not None:
+                chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
+            else:
+                chans = []
+            evt_config["channels"][field] = chans
+
+    log.debug(json.dumps(evt_config["channels"], indent=2))
+
+    t_start = time.time()
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    file_table = {
+        "tcm": (args.tcm_file, "hardware_tcm_1", "ch{}"),
+        "dsp": (args.dsp_file, "dsp", "ch{}"),
+        "hit": (args.hit_file, "hit", "ch{}"),
+        "evt": (None, "evt"),
+    }
+
+    if args.ann_file is not None:
+        file_table["ann"] = (args.ann_file, "dsp", "ch{}")
+
+    table = build_evt(
+        file_table,
+        evt_config,
+    )
+
+    if "muon_config" in config_dict and config_dict["muon_config"] is not None:
+        muon_config = Props.read_from(config_dict["muon_config"]["evt_config"])
+        field_config = Props.read_from(config_dict["muon_config"]["field_config"])
+        # block for snakemake to fill in channel lists
+        for field, dic in muon_config["channels"].items():
+            if isinstance(dic, dict):
+                chans = chmap.map("system", unique=False)[dic["system"]]
+                if "selectors" in dic:
+                    try:
+                        for k, val in dic["selectors"].items():
+                            chans = chans.map(k, unique=False)[val]
+                    except KeyError:
+                        chans = None
+                if chans is not None:
+                    chans = [f"ch{chan}" for chan in list(chans.map("daq.rawid"))]
+                else:
+                    chans = []
+                muon_config["channels"][field] = chans
+
+        trigger_timestamp = table[field_config["ged_timestamp"]["table"]][
+            field_config["ged_timestamp"]["field"]
+        ].nda
+        if "hardware_tcm_2" in lh5.ls(args.tcm_file):
+            muon_table = build_evt(
+                {
+                    "tcm": (args.tcm_file, "hardware_tcm_2", "ch{}"),
+                    "dsp": (args.dsp_file, "dsp", "ch{}"),
+                    "hit": (args.hit_file, "hit", "ch{}"),
+                    "evt": (None, "evt"),
+                },
+                muon_config,
+            )
+
+            muon_timestamp = muon_table[field_config["muon_timestamp"]["field"]].nda
+            muon_tbl_flag = muon_table[field_config["muon_flag"]["field"]].nda
+            if len(muon_timestamp[muon_tbl_flag]) > 0:
+                is_muon_veto_triggered = find_matching_values_with_delay(
+                    trigger_timestamp,
+                    muon_timestamp[muon_tbl_flag],
+                    field_config["jitter"],
+                )
+                muon_flag = np.isin(trigger_timestamp, is_muon_veto_triggered)
+            else:
+                muon_flag = np.zeros(len(trigger_timestamp), dtype=bool)
+        else:
+            muon_flag = np.zeros(len(trigger_timestamp), dtype=bool)
+        table[field_config["output_field"]["table"]].add_column(
+            field_config["output_field"]["field"], Array(muon_flag)
+        )
+
+    sto.write(obj=table, name="evt", lh5_file=args.output, wo_mode="a")
+
+    t_elap = time.time() - t_start
+    log.info(f"Done!  Time elapsed: {t_elap:.2f} sec.")
diff --git a/workflow/src/legenddataflow/scripts/tier/hit.py b/workflow/src/legenddataflow/scripts/tier/hit.py
new file mode 100644
index 0000000..9fd489f
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/hit.py
@@ -0,0 +1,98 @@
+import argparse
+import time
+from pathlib import Path
+
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata, TextDB
+from lgdo import lh5
+from pygama.hit.build_hit import build_hit
+
+from ...log import build_log
+
+
+def build_tier_hit() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--input", help="input file", type=str)
+    argparser.add_argument("--pars_file", help="hit pars file", nargs="*")
+
+    argparser.add_argument("--configs", help="configs", type=str, required=True)
+    argparser.add_argument("--metadata", help="metadata", type=str, required=True)
+    argparser.add_argument("--log", help="log_file", type=str)
+
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--tier", help="Tier", type=str, required=True)
+
+    argparser.add_argument("--output", help="output file", type=str)
+    argparser.add_argument("--db_file", help="db file", type=str)
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True)
+    if args.tier == "hit" or args.tier == "pht":
+        config_dict = configs.on(args.timestamp, system=args.datatype)[
+            "snakemake_rules"
+        ]["tier_hit"]
+    else:
+        msg = "unknown tier"
+        raise ValueError(msg)
+
+    log = build_log(config_dict, args.log)
+
+    channel_dict = config_dict["inputs"]["hit_config"]
+    settings_dict = config_dict["options"].get("settings", {})
+    if isinstance(settings_dict, str):
+        settings_dict = Props.read_from(settings_dict)
+
+    meta = LegendMetadata(path=args.metadata)
+    chan_map = meta.channelmap(args.timestamp, system=args.datatype)
+
+    pars_dict = Props.read_from(args.pars_file)
+    pars_dict = {chan: chan_dict["pars"] for chan, chan_dict in pars_dict.items()}
+
+    hit_dict = {}
+    channels_present = lh5.ls(args.input)
+    for channel in pars_dict:
+        chan_pars = pars_dict[channel].copy()
+        try:
+            detector = chan_map.map("daq.rawid")[int(channel[2:])].name
+            if detector in channel_dict:
+                cfg_dict = Props.read_from(channel_dict[detector])
+                Props.add_to(cfg_dict, chan_pars)
+                chan_pars = cfg_dict
+
+            if channel in channels_present:
+                hit_dict[f"{channel}/dsp"] = chan_pars
+        except KeyError:
+            pass
+
+    t_start = time.time()
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+    build_hit(args.input, lh5_tables_config=hit_dict, outfile=args.output)
+    t_elap = time.time() - t_start
+    log.info(f"Done!  Time elapsed: {t_elap:.2f} sec.")
+
+    hit_outputs = {}
+    hit_channels = []
+    for channel, file in channel_dict.items():
+        output = Props.read_from(file)["outputs"]
+        in_dict = False
+        for entry in hit_outputs:
+            if hit_outputs[entry]["fields"] == output:
+                hit_outputs[entry]["channels"].append(channel)
+                in_dict = True
+        if in_dict is False:
+            hit_outputs[f"group{len(list(hit_outputs))+1}"] = {
+                "channels": [channel],
+                "fields": output,
+            }
+        hit_channels.append(channel)
+
+    key = args.output.replace(f"-tier_{args.tier}.lh5", "")
+
+    full_dict = {
+        "valid_fields": {args.tier: hit_outputs},
+        "valid_keys": {key: {"valid_channels": {args.tier: hit_channels}}},
+    }
+
+    Path(args.db_file).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.db_file, full_dict)
diff --git a/workflow/src/legenddataflow/scripts/tier/raw_blind.py b/workflow/src/legenddataflow/scripts/tier/raw_blind.py
new file mode 100644
index 0000000..19eb023
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/raw_blind.py
@@ -0,0 +1,185 @@
+"""
+This script takes in raw data, applies the calibration to the daqenergy
+and uses this to blind the data in a window of Qbb +- 25 keV. It copies over all
+channels in a raw file, removing those events that fall within the ROI for Ge detectors
+that have a daqenergy calibration curve and are not anti-coincidence only (AC). It removes
+the whole event from all of the Ge and SiPM channels.
+
+In the Snakemake dataflow, this script only runs if the checkfile is found on disk,
+but this is controlled by the Snakemake flow (presumably an error is thrown if the file
+is not found). This script itself does not check for the existence of such a file.
+"""
+
+import argparse
+from pathlib import Path
+
+import numexpr as ne
+import numpy as np
+from dbetto.catalog import Props
+from legendmeta import LegendMetadata, TextDB
+from lgdo import lh5
+
+from ...log import build_log
+
+
+def build_tier_raw_blind() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--input", help="input file", type=str)
+    argparser.add_argument("--output", help="output file", type=str)
+    argparser.add_argument(
+        "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*"
+    )
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--configs", help="config file", type=str)
+    argparser.add_argument("--chan_maps", help="chan map", type=str)
+    argparser.add_argument("--metadata", help="metadata", type=str)
+    argparser.add_argument("--log", help="log file", type=str)
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True)
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+        "tier_raw"
+    ]
+
+    build_log(config_dict, args.log)
+
+    hdf_settings = Props.read_from(config_dict["settings"])["hdf5_settings"]
+    blinding_settings = Props.read_from(config_dict["config"])
+
+    centroid = blinding_settings["centroid_in_keV"]  # keV
+    width = blinding_settings["width_in_keV"]  # keV
+
+    # list of all channels and objects in the raw file
+    all_channels = lh5.ls(args.input)
+
+    # list of Ge channels and SiPM channels with associated metadata
+    legendmetadata = LegendMetadata(args.metadata, lazy=True)
+    ged_channels = (
+        legendmetadata.channelmap(args.timestamp)
+        .map("system", unique=False)["geds"]
+        .map("daq.rawid")
+    )
+    spms_channels = (
+        legendmetadata.channelmap(args.timestamp)
+        .map("system", unique=False)["spms"]
+        .map("daq.rawid")
+    )
+    auxs_channels = (
+        legendmetadata.channelmap(args.timestamp)
+        .map("system", unique=False)["auxs"]
+        .map("daq.rawid")
+    )
+    blsn_channels = (
+        legendmetadata.channelmap(args.timestamp)
+        .map("system", unique=False)["bsln"]
+        .map("daq.rawid")
+    )
+    puls_channels = (
+        legendmetadata.channelmap(args.timestamp)
+        .map("system", unique=False)["puls"]
+        .map("daq.rawid")
+    )
+
+    store = lh5.LH5Store()
+
+    # rows that need blinding
+    toblind = np.array([])
+
+    # first, loop through the Ge detector channels, calibrate them and look for events that should be blinded
+    for chnum in list(ged_channels):
+        # skip Ge detectors that are anti-coincidence only or not able to be blinded for some other reason
+        if ged_channels[chnum]["analysis"]["is_blinded"] is False:
+            continue
+
+        # load in just the daqenergy for now
+        daqenergy, _ = store.read(f"ch{chnum}/raw/daqenergy", args.input)
+
+        # read in calibration curve for this channel
+        blind_curve = Props.read_from(args.blind_curve)[f"ch{chnum}"]["pars"][
+            "operations"
+        ]
+
+        # calibrate daq energy using pre existing curve
+        daqenergy_cal = ne.evaluate(
+            blind_curve["daqenergy_cal"]["expression"],
+            local_dict=dict(
+                daqenergy=daqenergy, **blind_curve["daqenergy_cal"]["parameters"]
+            ),
+        )
+
+        # figure out which event indices should be blinded
+        toblind = np.append(
+            toblind,
+            np.nonzero(np.abs(np.asarray(daqenergy_cal) - centroid) <= width)[0],
+        )
+
+    # remove duplicates
+    toblind = np.unique(toblind)
+
+    # total number of events (from last Ge channel loaded, should be same for all Ge channels)
+    allind = np.arange(len(daqenergy))
+
+    # gets events that should not be blinded
+    tokeep = allind[np.logical_not(np.isin(allind, toblind))]
+
+    # make some temp file to write the output to before renaming it
+    rng = np.random.default_rng()
+    rand_num = f"{rng.integers(0,99999):05d}"
+    temp_output = f"{args.output}.{rand_num}"
+    Path(temp_output).parent.mkdir(parents=True, exist_ok=True)
+
+    for channel in all_channels:
+        try:
+            chnum = int(channel[2::])
+        except ValueError:
+            # if this isn't an interesting channel, just copy it to the output file
+            chobj, _ = store.read(channel, args.input, decompress=False)
+            store.write_object(
+                chobj,
+                channel,
+                lh5_file=temp_output,
+                wo_mode="w",
+                **hdf_settings,
+            )
+            continue
+
+        if (
+            (chnum not in list(ged_channels))
+            and (chnum not in list(spms_channels))
+            and (chnum not in list(auxs_channels))
+            and (chnum not in list(blsn_channels))
+            and (chnum not in list(puls_channels))
+        ):
+            # if this is a PMT or not included for some reason, just copy it to the output file
+            chobj, _ = store.read(channel + "/raw", args.input, decompress=False)
+            store.write_object(
+                chobj,
+                group=channel,
+                name="raw",
+                lh5_file=temp_output,
+                wo_mode="w",
+                **hdf_settings,
+            )
+            continue
+
+        # the rest should be the Ge and SiPM channels that need to be blinded
+
+        # read in all of the data but only for the unblinded events
+        blinded_chobj, _ = store.read(
+            channel + "/raw", args.input, idx=tokeep, decompress=False
+        )
+
+        # now write the blinded data for this channel
+        store.write_object(
+            blinded_chobj,
+            group=channel,
+            name="raw",
+            lh5_file=temp_output,
+            wo_mode="w",
+            **hdf_settings,
+        )
+
+    # rename the temp file
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+    Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py
new file mode 100644
index 0000000..fefc8a1
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py
@@ -0,0 +1,72 @@
+import argparse
+from copy import deepcopy
+from pathlib import Path
+
+import numpy as np
+from daq2lh5 import build_raw
+from dbetto import TextDB
+from dbetto.catalog import Props
+
+from ...log import build_log
+
+
+def build_tier_raw_fcio() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("input", help="input file", type=str)
+    argparser.add_argument("output", help="output file", type=str)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--configs", help="config file", type=str)
+    argparser.add_argument("--chan_maps", help="chan map", type=str)
+    argparser.add_argument("--log", help="log file", type=str)
+    args = argparser.parse_args()
+
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    config_dict = (
+        TextDB(args.configs, lazy=True)
+        .on(args.timestamp, system=args.datatype)
+        .snakemake_rules.tier_raw_fcio
+    )
+
+    build_log(config_dict, args.log)
+
+    channel_dict = config_dict.inputs
+    settings = Props.read_from(channel_dict.settings)
+    channel_dict = channel_dict.out_spec
+    all_config = Props.read_from(channel_dict.gen_config)
+
+    chmap = (
+        TextDB(args.chan_maps, lazy=True).channelmaps.on(args.timestamp).group("system")
+    )
+
+    if "geds_config" in channel_dict:
+        raise NotImplementedError()
+
+    if "spms_config" in channel_dict:
+        spm_config = Props.read_from(channel_dict.spms_config)
+        spm_channels = chmap.spms.map("daq.rawid")
+
+        for rawid, chinfo in spm_channels.items():
+            cfg_block = deepcopy(spm_config["FCEventDecoder"]["__output_table_name__"])
+            cfg_block["key_list"] = [chinfo.daq.fc_channel]
+            spm_config["FCEventDecoder"][f"ch{rawid:07d}/raw"] = cfg_block
+
+        spm_config["FCEventDecoder"].pop("__output_table_name__")
+
+        Props.add_to(all_config, spm_config)
+
+    if "auxs_config" in channel_dict:
+        raise NotImplementedError()
+
+    if "muon_config" in channel_dict:
+        raise NotImplementedError()
+
+    rng = np.random.default_rng()
+    rand_num = f"{rng.integers(0,99999):05d}"
+    temp_output = f"{args.output}.{rand_num}"
+
+    build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
+
+    # rename the temp file
+    Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py
new file mode 100644
index 0000000..00d7751
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py
@@ -0,0 +1,110 @@
+import argparse
+import logging
+from pathlib import Path
+
+import numpy as np
+from daq2lh5 import build_raw
+from dbetto import TextDB
+from dbetto.catalog import Props
+
+from ...log import build_log
+
+
+def build_tier_raw_orca() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("input", help="input file", type=str)
+    argparser.add_argument("output", help="output file", type=str)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--configs", help="config file", type=str)
+    argparser.add_argument("--chan_maps", help="chan map", type=str)
+    argparser.add_argument("--log", help="log file")
+    args = argparser.parse_args()
+
+    Path(args.log).parent.mkdir(parents=True, exist_ok=True)
+    logging.basicConfig(level=logging.INFO, filename=args.log, filemode="w")
+
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    configs = TextDB(args.configs, lazy=True)
+    config_dict = configs.on(args.timestamp, system=args.datatype)["snakemake_rules"][
+        "tier_raw"
+    ]
+
+    build_log(config_dict, args.log)
+
+    channel_dict = config_dict["inputs"]
+    settings = Props.read_from(channel_dict["settings"])
+    channel_dict = channel_dict["out_spec"]
+    all_config = Props.read_from(channel_dict["gen_config"])
+
+    chmap = TextDB(args.chan_maps, lazy=True)
+
+    if "geds_config" in list(channel_dict):
+        ged_config = Props.read_from(channel_dict["geds_config"])
+
+        ged_channels = list(
+            chmap.channelmaps.on(args.timestamp)
+            .map("system", unique=False)["geds"]
+            .map("daq.rawid")
+        )
+
+        ged_config[next(iter(ged_config))]["geds"]["key_list"] = sorted(ged_channels)
+        Props.add_to(all_config, ged_config)
+
+    if "spms_config" in list(channel_dict):
+        spm_config = Props.read_from(channel_dict["spms_config"])
+
+        spm_channels = list(
+            chmap.channelmaps.on(args.timestamp)
+            .map("system", unique=False)["spms"]
+            .map("daq.rawid")
+        )
+
+        spm_config[next(iter(spm_config))]["spms"]["key_list"] = sorted(spm_channels)
+        Props.add_to(all_config, spm_config)
+
+    if "auxs_config" in list(channel_dict):
+        aux_config = Props.read_from(channel_dict["auxs_config"])
+        aux_channels = list(
+            chmap.channelmaps.on(args.timestamp)
+            .map("system", unique=False)["auxs"]
+            .map("daq.rawid")
+        )
+        aux_channels += list(
+            chmap.channelmaps.on(args.timestamp)
+            .map("system", unique=False)["puls"]
+            .map("daq.rawid")
+        )
+        aux_channels += list(
+            chmap.channelmaps.on(args.timestamp)
+            .map("system", unique=False)["bsln"]
+            .map("daq.rawid")
+        )
+        top_key = next(iter(aux_config))
+        aux_config[top_key][next(iter(aux_config[top_key]))]["key_list"] = sorted(
+            aux_channels
+        )
+        Props.add_to(all_config, aux_config)
+
+    if "muon_config" in list(channel_dict):
+        muon_config = Props.read_from(channel_dict["muon_config"])
+        muon_channels = list(
+            chmap.channelmaps.on(args.timestamp)
+            .map("system", unique=False)["muon"]
+            .map("daq.rawid")
+        )
+        top_key = next(iter(muon_config))
+        muon_config[top_key][next(iter(muon_config[top_key]))]["key_list"] = sorted(
+            muon_channels
+        )
+        Props.add_to(all_config, muon_config)
+
+    rng = np.random.default_rng()
+    rand_num = f"{rng.integers(0,99999):05d}"
+    temp_output = f"{args.output}.{rand_num}"
+
+    build_raw(args.input, out_spec=all_config, filekey=temp_output, **settings)
+
+    # rename the temp file
+    Path(temp_output).rename(args.output)
diff --git a/workflow/src/legenddataflow/scripts/tier/skm.py b/workflow/src/legenddataflow/scripts/tier/skm.py
new file mode 100644
index 0000000..a698629
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/skm.py
@@ -0,0 +1,96 @@
+import argparse
+
+import awkward as ak
+from dbetto import TextDB
+from dbetto.catalog import Props
+from lgdo import lh5
+from lgdo.types import Array, Struct, Table, VectorOfVectors
+
+from ...log import build_log
+
+
+def get_all_out_fields(input_table, out_fields, current_field=""):
+    for key in input_table:
+        field = input_table[key]
+        key_string = f"{current_field}.{key}"
+        if isinstance(field, (Table, Struct)):
+            get_all_out_fields(field, out_fields, key_string)
+        else:
+            if key_string not in out_fields:
+                out_fields.append(key_string)
+    return out_fields
+
+
+def build_tier_skm() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--evt_file", help="evt file", required=True)
+    argparser.add_argument("--configs", help="configs", required=True)
+    argparser.add_argument("--datatype", help="datatype", required=True)
+    argparser.add_argument("--timestamp", help="timestamp", required=True)
+    argparser.add_argument("--log", help="log file", default=None)
+    argparser.add_argument("--output", help="output file", required=True)
+    args = argparser.parse_args()
+
+    # load in config
+    config_dict = TextDB(args.configs, lazy=True).on(
+        args.timestamp, system=args.datatype
+    )["snakemake_rules"]["tier_skm"]
+
+    build_log(config_dict, args.log)
+
+    skm_config_file = config_dict["inputs"]["skm_config"]
+    evt_filter = Props.read_from(skm_config_file)["evt_filter"]
+    out_fields = Props.read_from(skm_config_file)["keep_fields"]
+
+    store = lh5.LH5Store()
+
+    evt = lh5.read_as("evt", args.evt_file, "ak")  # noqa: F841
+
+    # remove unwanted events
+    skm = eval(f"evt[{evt_filter}]")
+    # make it rectangular and make an LGDO Table
+    out_table = Table(skm)
+
+    for field in out_fields:
+        items = field.split(".")
+        ptr1 = out_table
+        for item in items[:-1]:
+            ptr1 = ptr1[item]
+
+        if isinstance(ptr1[items[-1]], Table):
+            out_fields.remove(field)
+            out_fields = get_all_out_fields(
+                ptr1[items[-1]], out_fields, current_field=field
+            )
+
+    # remove unwanted columns
+    out_table_skm = Table(size=len(out_table))
+    for field in out_fields:
+        # table nesting is labeled by '.' in the config
+        items = field.split(".")
+        # get to actual nested field recursively
+        ptr1 = out_table
+        ptr2 = out_table_skm
+        for item in items[:-1]:
+            # make intermediate tables in new table
+            if item not in ptr2:
+                ptr2.add_field(item, Table(size=len(out_table)))
+            # get non-table LGDO recursively
+            ptr1 = ptr1[item]
+            ptr2 = ptr2[item]
+
+        # finally add column to new table
+        if isinstance(ptr1[items[-1]], VectorOfVectors):
+            ptr2.add_field(items[-1], Array(ak.flatten(ptr1[items[-1]].view_as("ak"))))
+        else:
+            ptr2.add_field(items[-1], ptr1[items[-1]])
+        attrs = ptr1[items[-1]].attrs
+
+        # forward LGDO attributes
+        # attrs = evt[field.replace(".", "_")].attrs
+        for attr, val in attrs.items():
+            if attr != "datatype":
+                ptr2.attrs[attr] = val
+
+    # write-append to disk
+    store.write(out_table_skm, "skm", args.output, wo_mode="w")
diff --git a/workflow/src/legenddataflow/scripts/tier/tcm.py b/workflow/src/legenddataflow/scripts/tier/tcm.py
new file mode 100644
index 0000000..6f53b1f
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/tier/tcm.py
@@ -0,0 +1,55 @@
+import argparse
+from pathlib import Path
+
+import lgdo.lh5 as lh5
+import numpy as np
+from daq2lh5.orca import orca_flashcam
+from dbetto import TextDB
+from dbetto.catalog import Props
+from pygama.evt.build_tcm import build_tcm
+
+from ...log import build_log
+
+
+def build_tier_tcm() -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("input", help="input file", type=str)
+    argparser.add_argument("output", help="output file", type=str)
+    argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
+    argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
+    argparser.add_argument("--configs", help="config file", type=str)
+    argparser.add_argument("--log", help="log file", type=str)
+    args = argparser.parse_args()
+
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
+    config_dict = configs["snakemake_rules"]["tier_tcm"]
+
+    build_log(config_dict, args.log)
+
+    settings = Props.read_from(config_dict["inputs"]["config"])
+
+    rng = np.random.default_rng()
+    temp_output = f"{args.output}.{rng.integers(0, 99999):05d}"
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+    # get the list of channels by fcid
+    ch_list = lh5.ls(args.input, "/ch*")
+    fcid_channels = {}
+    for ch in ch_list:
+        key = int(ch[2:])
+        fcid = orca_flashcam.get_fcid(key)
+        if fcid not in fcid_channels:
+            fcid_channels[fcid] = []
+        fcid_channels[fcid].append(f"/{ch}/raw")
+
+    # make a hardware_tcm_[fcid] for each fcid
+    for fcid, fcid_dict in fcid_channels.items():
+        build_tcm(
+            [(args.input, fcid_dict)],
+            out_file=temp_output,
+            out_name=f"hardware_tcm_{fcid}",
+            wo_mode="o",
+            **settings,
+        )
+
+    Path(temp_output).rename(args.output)

From b2b5ec6b98a85daa3f89c1102471d820a5f057eb Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 00:07:27 +0100
Subject: [PATCH 079/101] debugging

---
 workflow/src/legenddataflow/FileKey.py      | 23 ++++++++++++++--
 workflow/src/legenddataflow/cal_grouping.py | 29 +++++++++++++--------
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py
index c11e6e5..1e33b96 100644
--- a/workflow/src/legenddataflow/FileKey.py
+++ b/workflow/src/legenddataflow/FileKey.py
@@ -5,6 +5,7 @@
 import re
 import string
 from collections import namedtuple
+from itertools import product
 from pathlib import Path
 
 from .patterns import (
@@ -21,10 +22,16 @@
 
 def regex_from_filepattern(filepattern):
     f = []
+    wildcards = []
     last = 0
     for match in re.compile(r"\{(?P<name>[\w]+)\}").finditer(filepattern):
+        f.append(re.escape(filepattern[last : match.start()]))
         wildcard = match.group("name")
-        f.append(f"(?P={wildcard})")
+        if wildcard in wildcards:
+            f.append(f"(?P={wildcard})")
+        else:
+            wildcards.append(wildcard)
+            f.append(f"(?P<{wildcard}>.+)")
         last = match.end()
     f.append(re.escape(filepattern[last:]))
     f.append("$")
@@ -101,9 +108,21 @@ def parse_keypart(cls, keypart):
         return cls(**d)
 
     def expand(self, file_pattern, **kwargs):
+        if isinstance(file_pattern, Path):
+            file_pattern = file_pattern.as_posix()
         wildcard_dict = dict(**self._asdict(), **kwargs)
+        wildcard_dict = {
+            wildcard: [wildcard_value]
+            if isinstance(wildcard_value, str)
+            else wildcard_value
+            for wildcard, wildcard_value in wildcard_dict.items()
+        }
         formatter = string.Formatter()
-        return [formatter.vformat(file_pattern, (), wildcard_dict)]
+        result = []
+        for combo in product(*wildcard_dict.values()):
+            substitution = dict(zip(list(wildcard_dict), combo))
+            result.append(formatter.vformat(file_pattern, (), substitution))
+        return result
 
     def get_path_from_filekey(self, pattern, **kwargs):
         if kwargs is None:
diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py
index 5c19ea7..13145d7 100644
--- a/workflow/src/legenddataflow/cal_grouping.py
+++ b/workflow/src/legenddataflow/cal_grouping.py
@@ -85,7 +85,7 @@ def get_par_files(
                 ):
                     all_par_files.append(par_file)
         if channel == "default":
-            channel = "{channel}"
+            channel = "{detector}"
         selected_par_files = []
         for par_file in all_par_files:
             fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
@@ -138,7 +138,7 @@ def get_plt_files(
                 ):
                     all_par_files.append(par_file)
         if channel == "default":
-            channel = "{channel}"
+            channel = "{detector}"
         selected_par_files = []
         for par_file in all_par_files:
             fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
@@ -184,14 +184,17 @@ def get_log_file(
             datatype=datatype,
             name=name,
         )
-        fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
-        if channel == "default":
-            fk.channel = "{channel}"
+        if len(par_files) > 0:
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
+            if channel == "default":
+                fk.channel = "{detector}"
+            else:
+                fk.channel = channel
+            return fk.get_path_from_filekey(
+                get_pattern_log_channel(self.setup, name, processing_timestamp)
+            )[0]
         else:
-            fk.channel = channel
-        return fk.get_path_from_filekey(
-            get_pattern_log_channel(self.setup, name, processing_timestamp)
-        )[0]
+            return "/tmp/log.log"
 
     def get_timestamp(
         self, catalog, dataset, channel, tier, experiment="l200", datatype="cal"
@@ -205,8 +208,12 @@ def get_timestamp(
             datatype=datatype,
             name=None,
         )
-        fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
-        return fk.timestamp
+
+        if len(par_files) > 0:
+            fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
+            return fk.timestamp
+        else:
+            return "20200101T000000Z"
 
     def get_wildcard_constraints(self, dataset, channel):
         if channel == "default":

From 16128845edc10e22605a3635203c4a338f376986 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 00:08:41 +0100
Subject: [PATCH 080/101] add channel merge rules

---
 workflow/rules/channel_merge.smk | 156 +++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 workflow/rules/channel_merge.smk

diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
new file mode 100644
index 0000000..f717eda
--- /dev/null
+++ b/workflow/rules/channel_merge.smk
@@ -0,0 +1,156 @@
+from scripts.util.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_plts,
+    get_pattern_tier,
+    get_pattern_pars_tmp,
+    get_pattern_pars,
+)
+from scripts.util.utils import set_last_rule_name
+import inspect
+
+def build_merge_rules(tier,lh5_merge=False):
+    rule:
+        input:
+            lambda wildcards: get_plt_chanlist(
+                setup,
+                f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+                tier,
+                basedir,
+                det_status,
+                chan_maps,
+            ),
+        params:
+            timestamp="{timestamp}",
+            datatype="cal",
+        output:
+            get_pattern_plts(setup, tier),
+        group:
+            f"merge-{tier}"
+        shell:
+            "{swenv} python3 -B "
+            "{basedir}/../scripts/merge_channels.py "
+            "--input {input} "
+            "--output {output} "
+            "--channelmap {meta} "
+
+    set_last_rule_name(workflow, f"build_plts_{tier}")
+
+    rule:
+        input:
+            lambda wildcards: get_par_chanlist(
+                setup,
+                f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+                tier,
+                basedir,
+                det_status,
+                chan_maps,
+                name="objects",
+                extension="pkl",
+            ),
+        output:
+            get_pattern_pars(
+                setup,
+                tier,
+                name="objects",
+                extension="dir",
+                check_in_cycle=check_in_cycle,
+            ),
+        group:
+            f"merge-{tier}"
+        shell:
+            "{swenv} python3 -B "
+            "{basedir}/../scripts/merge_channels.py "
+            "--input {input} "
+            "--output {output} "
+            "--timestamp {params.timestamp} "
+            "--channelmap {meta} "
+
+    set_last_rule_name(workflow, f"build_pars_{tier}_objects")
+
+    if lh5_merge is True:
+        rule:
+            input:
+                lambda wildcards: get_par_chanlist(
+                    setup,
+                    f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+                    tier,
+                    basedir,
+                    det_status,
+                    chan_maps,
+                ),
+            params:
+                timestamp="{timestamp}",
+                datatype="cal",
+            output:
+                temp(
+                    get_pattern_pars_tmp(
+                        setup,
+                        tier,
+                        datatype="cal",
+                    )
+                ),
+            group:
+                f"merge-{tier}"
+            shell:
+                "{swenv} python3 -B "
+                "{basedir}/../scripts/merge_channels.py "
+                "--input {input} "
+                "--output {output} "
+                "--timestamp {params.timestamp} "
+                "--channelmap {meta} "
+
+        set_last_rule_name(workflow, f"build_pars_{tier}_db")
+
+    rule:
+        input:
+            in_files=lambda wildcards: get_par_chanlist(
+                setup,
+                f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
+                tier,
+                basedir,
+                det_status,
+                chan_maps,
+                extension="lh5" if lh5_merge is True else inspect.signature(get_par_chanlist).parameters['extension'].default,
+            ),
+            in_db=get_pattern_pars_tmp(
+                setup,
+                "dsp",
+                datatype="cal",
+            ) if lh5_merge is True else None,
+            plts=get_pattern_plts(setup, "dsp"),
+            objects=get_pattern_pars(
+                setup,
+                "dsp",
+                name="objects",
+                extension="dir",
+                check_in_cycle=check_in_cycle,
+            ),
+        params:
+            timestamp="{timestamp}",
+            datatype="cal",
+        output:
+            out_file=get_pattern_pars(
+                setup,
+                tier,
+                extension="lh5" if lh5_merge is True else inspect.signature(get_pattern_pars).parameters['extension'].default,
+                check_in_cycle=check_in_cycle,
+            ),
+            out_db=get_pattern_pars(setup, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else None,
+        group:
+            f"merge-{tier}"
+        run:
+            shell_cmd  = "{swenv} python3 -B "
+            shell_cmd += "{basedir}/../scripts/merge_channels.py "
+            shell_cmd += "--output {output.out_file} "
+            shell_cmd += "--input {input.in_files} "
+            shell_cmd += "--timestamp {params.timestamp} "
+            shell_cmd += "--channelmap {meta} "
+            if lh5_merge is True:
+                shell_cmd +="--in_db {input.in_db} "
+                shell_cmd +="--out_db {output.out_db} "
+            shell(
+                shell_cmd
+            )
+
+    set_last_rule_name(workflow, f"build_pars_{tier}")

From 9be9e0983b0e0ff5843d9186c220332bf64294b5 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 17:49:59 +0100
Subject: [PATCH 081/101] update to script execs

---
 workflow/rules/ann.smk                  |  7 ++---
 workflow/rules/blinding_calibration.smk | 10 +++----
 workflow/rules/blinding_check.smk       | 10 +++----
 workflow/rules/chanlist_gen.smk         |  2 +-
 workflow/rules/channel_merge.smk        | 33 ++++++++++----------
 workflow/rules/dsp.smk                  | 16 ++++------
 workflow/rules/dsp_pars_geds.smk        | 22 +++++---------
 workflow/rules/evt.smk                  | 11 ++++---
 workflow/rules/hit.smk                  | 25 ++++++----------
 workflow/rules/pht.smk                  | 40 +++++++++----------------
 workflow/rules/pht_fast.smk             |  7 ++---
 workflow/rules/psp.smk                  | 16 ++++------
 workflow/rules/psp_pars_geds.smk        | 13 ++++----
 workflow/rules/qc_phy.smk               | 13 ++++----
 workflow/rules/raw.smk                  | 10 +++----
 workflow/rules/skm.smk                  |  4 +--
 workflow/rules/tcm.smk                  |  7 ++---
 17 files changed, 98 insertions(+), 148 deletions(-)

diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index 7a50005..8e7429f 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -9,6 +9,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 
 rule build_ann:
@@ -29,8 +30,7 @@ rule build_ann:
         runtime=300,
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_dsp.py "
+        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -61,8 +61,7 @@ rule build_pan:
         runtime=300,
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_dsp.py "
+        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index 8407893..1a69313 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -12,6 +12,7 @@ from legenddataflow.patterns import (
     get_pattern_log_channel,
 )
 from pathlib import Path
+from legenddataflow.execenv import execenv_smk_py_script
 
 
 rule build_blinding_calibration:
@@ -37,8 +38,7 @@ rule build_blinding_calibration:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/blinding_calibration.py "
+        f'{execenv_smk_py_script(config, "par_geds_raw_blindcal")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -66,8 +66,7 @@ rule build_plts_blinding:
     group:
         "merge-blindcal"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -89,7 +88,6 @@ rule build_pars_blinding:
     group:
         "merge-blindcal"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input.infiles} "
         "--output {output} "
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index 916009f..bd9b796 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -12,6 +12,7 @@ from legenddataflow.patterns import (
     get_pattern_plts,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 from pathlib import Path
 
 
@@ -38,8 +39,7 @@ rule build_blinding_check:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/check_blinding.py "
+        f'{execenv_smk_py_script(config, "par_geds_raw_blindcheck")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -67,8 +67,7 @@ rule build_plts_raw:
     group:
         "merge-raw"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -92,5 +91,4 @@ rule build_pars_raw:
     group:
         "merge-raw"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
index 06200e3..b6a3ea8 100644
--- a/workflow/rules/chanlist_gen.smk
+++ b/workflow/rules/chanlist_gen.smk
@@ -23,7 +23,7 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps):
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = "create_chankeylist"  # execenv_smk_py_script(workflow, config,  )[0]
+    cmd = execenv_smk_py_script(config, "create_chankeylist")
     cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
     cmd += f"--datatype cal --output_file {output_file}"
     os.system(cmd)
diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
index f717eda..21ebf25 100644
--- a/workflow/rules/channel_merge.smk
+++ b/workflow/rules/channel_merge.smk
@@ -8,6 +8,7 @@ from scripts.util.patterns import (
 )
 from scripts.util.utils import set_last_rule_name
 import inspect
+from legenddataflow.execenv import execenv_smk_py_script
 
 def build_merge_rules(tier,lh5_merge=False):
     rule:
@@ -28,8 +29,7 @@ def build_merge_rules(tier,lh5_merge=False):
         group:
             f"merge-{tier}"
         shell:
-            "{swenv} python3 -B "
-            "{basedir}/../scripts/merge_channels.py "
+            f'{execenv_smk_py_script(config, "merge_channels")}'
             "--input {input} "
             "--output {output} "
             "--channelmap {meta} "
@@ -59,8 +59,7 @@ def build_merge_rules(tier,lh5_merge=False):
         group:
             f"merge-{tier}"
         shell:
-            "{swenv} python3 -B "
-            "{basedir}/../scripts/merge_channels.py "
+            f'{execenv_smk_py_script(config, "merge_channels")}'
             "--input {input} "
             "--output {output} "
             "--timestamp {params.timestamp} "
@@ -93,8 +92,7 @@ def build_merge_rules(tier,lh5_merge=False):
             group:
                 f"merge-{tier}"
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/merge_channels.py "
+                execenv_smk_py_script(config, "merge_channels")
                 "--input {input} "
                 "--output {output} "
                 "--timestamp {params.timestamp} "
@@ -140,17 +138,18 @@ def build_merge_rules(tier,lh5_merge=False):
         group:
             f"merge-{tier}"
         run:
-            shell_cmd  = "{swenv} python3 -B "
-            shell_cmd += "{basedir}/../scripts/merge_channels.py "
-            shell_cmd += "--output {output.out_file} "
-            shell_cmd += "--input {input.in_files} "
-            shell_cmd += "--timestamp {params.timestamp} "
-            shell_cmd += "--channelmap {meta} "
-            if lh5_merge is True:
-                shell_cmd +="--in_db {input.in_db} "
-                shell_cmd +="--out_db {output.out_db} "
-            shell(
-                shell_cmd
+            shell_string = (
+                execenv_smk_py_script(config, "merge_channels")
+                "--output {output.out_file} "
+                "--input {input.in_files} "
+                "--timestamp {params.timestamp} "
+                "--channelmap {meta} "
             )
+            if lh5_merge is True:
+                shell_string += (
+                    "--in_db {input.in_db} "
+                    "--out_db {output.out_db} "
+                )
+            shell(shell_string)
 
     set_last_rule_name(workflow, f"build_pars_{tier}")
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index 8340dc8..a386c86 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -15,6 +15,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -47,8 +48,7 @@ rule build_plts_dsp:
     group:
         "merge-dsp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
         "--channelmap {meta} "
@@ -80,8 +80,7 @@ rule build_pars_dsp_objects:
     group:
         "merge-dsp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
         "--timestamp {params.timestamp} "
@@ -112,8 +111,7 @@ rule build_pars_dsp_db:
     group:
         "merge-dsp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
         "--timestamp {params.timestamp} "
@@ -159,8 +157,7 @@ rule build_pars_dsp:
     group:
         "merge-dsp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--output {output.out_file} "
         "--in_db {input.in_db} "
         "--out_db {output.out_db} "
@@ -192,8 +189,7 @@ rule build_dsp:
         runtime=300,
         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_dsp.py "
+        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
         "--log {log} "
         "--tier dsp "
         f"--configs {ro(configs)} "
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 9fe1391..52f5cf6 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -13,6 +13,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -41,8 +42,7 @@ rule build_pars_dsp_tau_geds:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_tau_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_tau")}'
         "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
@@ -76,8 +76,7 @@ rule build_pars_evtsel_geds:
         runtime=300,
         mem_swap=70,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_evtsel_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_evtsel")}'
         "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
@@ -114,8 +113,7 @@ rule build_pars_dsp_nopt_geds:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_nopt_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_nopt")}'
         "--database {input.database} "
         "--configs {configs} "
         "--log {log} "
@@ -154,8 +152,7 @@ rule build_pars_dsp_dplms_geds:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_dplms_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_dplms")}'
         "--fft_raw_filelist {input.fft_files} "
         "--peak_file {input.peak_file} "
         "--database {input.database} "
@@ -193,8 +190,7 @@ rule build_pars_dsp_eopt_geds:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_eopt_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_eopt")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -225,8 +221,7 @@ rule build_svm_dsp_geds:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_build_svm_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}'
         "--log {log} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
@@ -246,8 +241,7 @@ rule build_pars_dsp_svm_geds:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_svm_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}'
         "--log {log} "
         "--input_file {input.dsp_pars} "
         "--output_file {output.dsp_pars} "
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index bb90ce8..1bcb2a4 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -9,6 +9,7 @@ from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_log_concat,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 
 rule build_evt:
@@ -43,8 +44,7 @@ rule build_evt:
         mem_swap=50,
     run:
         shell_string = (
-            f"{swenv} python3 -B "
-            f"{basedir}/../scripts/build_evt.py "
+            f'{execenv_smk_py_script(config, "build_tier_evt")}'
             f"--configs {ro(configs)} "
             f"--metadata {ro(meta)} "
             "--log {log} "
@@ -96,8 +96,7 @@ rule build_pet:
         mem_swap=50,
     run:
         shell_string = (
-            f"{swenv} python3 -B "
-            f"{basedir}/../scripts/build_evt.py "
+            f'{execenv_smk_py_script(config, "build_tier_evt")}'
             f"--configs {ro(configs)} "
             f"--metadata {ro(meta)} "
             "--log {log} "
@@ -139,14 +138,14 @@ for evt_tier in ("evt", "pet"):
         params:
             timestamp="all",
             datatype="{datatype}",
-            lh5concat_exe=config["paths"]["install"] + "/bin/lh5concat",
             ro_input=lambda _, input: utils.as_ro(config, input),
         log:
             get_pattern_log_concat(config, f"tier_{evt_tier}_concat", time),
         group:
             "tier-evt"
         shell:
-            "{swenv} {params.lh5concat_exe} --verbose --overwrite "
+            f'{execenv_smk_py_script(config, "lh5concat")}'
+            "--verbose --overwrite "
             "--output {output} "
             "-- {params.ro_input} &> {log}"
 
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 9ae6f77..aaa1cf5 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -20,6 +20,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 hit_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -59,8 +60,7 @@ rule build_qc:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_hit_qc.py "
+        f'{execenv_smk_py_script(config, "par_geds_hit_qc")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -108,8 +108,7 @@ rule build_energy_calibration:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_hit_ecal.py "
+        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -157,8 +156,7 @@ rule build_aoe_calibration:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_hit_aoe.py "
+        f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -204,8 +202,7 @@ rule build_lq_calibration:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_hit_lq.py "
+        f'{execenv_smk_py_script(config, "par_geds_hit_lq")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -247,8 +244,7 @@ rule build_pars_hit_objects:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {params.ro_input} "
         "--output {output} "
         "--channelmap {meta} "
@@ -271,8 +267,7 @@ rule build_plts_hit:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {params.ro_input} "
         "--output {output} "
         "--channelmap {meta} "
@@ -303,8 +298,7 @@ rule build_pars_hit:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {params.ro_input[infiles]} "
         "--output {output} "
         "--channelmap {meta} "
@@ -331,8 +325,7 @@ rule build_hit:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_hit.py "
+        f'{execenv_smk_py_script(config, "build_tier_hit")}'
         f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--log {log} "
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index dd1deb4..1b792c6 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -20,6 +20,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 pht_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -126,8 +127,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 30,
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/pars_pht_qc.py "
+                f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -179,8 +179,7 @@ rule build_pht_qc:
         mem_swap=60,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_pht_qc.py "
+        f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -241,8 +240,7 @@ rule build_per_energy_calibration:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_hit_ecal.py "
+        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -354,8 +352,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/pars_pht_partcal.py "
+                f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
@@ -415,8 +412,7 @@ rule build_pht_energy_super_calibrations:
         mem_swap=60,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_pht_partcal.py "
+        f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -537,8 +533,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/pars_pht_aoecal.py "
+                f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -598,8 +593,7 @@ rule build_pht_aoe_calibrations:
         mem_swap=60,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_pht_aoecal.py "
+        f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -718,8 +712,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/pars_pht_lqcal.py "
+                f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -774,8 +767,7 @@ rule build_pht_lq_calibration:
         mem_swap=60,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_pht_lqcal.py "
+        f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -826,8 +818,7 @@ rule build_pars_pht_objects:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -847,8 +838,7 @@ rule build_plts_pht:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -876,8 +866,7 @@ rule build_pars_pht:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input.infiles} "
         "--output {output} "
 
@@ -903,8 +892,7 @@ rule build_pht:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_hit.py "
+        f'{execenv_smk_py_script(config, "build_tier_hit")}'
         f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--log {log} "
diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_fast.smk
index 75f7a47..2379753 100644
--- a/workflow/rules/pht_fast.smk
+++ b/workflow/rules/pht_fast.smk
@@ -11,6 +11,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 
 pht_fast_rules = {}
@@ -105,8 +106,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 12,
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                f"{basedir}/../scripts/pars_pht_fast.py "
+                f'{execenv_smk_py_script(config, "par_geds_pht_fast")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -164,8 +164,7 @@ rule par_pht_fast:
         mem_swap=50,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_pht_fast.py "
+        f'{execenv_smk_py_script(config, "par_geds_pht_fast")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index 456d30a..990f186 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -14,6 +14,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -51,8 +52,7 @@ rule build_pars_psp_objects:
     group:
         "merge-psp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
         "--channelmap {meta} "
@@ -73,8 +73,7 @@ rule build_plts_psp:
     group:
         "merge-psp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
         "--channelmap {meta} "
@@ -101,8 +100,7 @@ rule build_pars_psp_db:
     group:
         "merge-psp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
         "--channelmap {meta} "
@@ -144,8 +142,7 @@ rule build_pars_psp:
     group:
         "merge-psp"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--output {output.out_file} "
         "--in_db {input.in_db} "
         "--out_db {output.out_db} "
@@ -176,8 +173,7 @@ rule build_psp:
         runtime=300,
         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_dsp.py "
+        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
         "--log {log} "
         "--tier psp "
         f"--configs {ro(configs)} "
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 6ac7e05..10d9ab1 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -14,6 +14,7 @@ from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_tier,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -95,8 +96,7 @@ for key, dataset in part.datasets.items():
             resources:
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/par_psp_geds.py "
+                f'{execenv_smk_py_script(config, "par_geds_psp_average")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
@@ -141,8 +141,7 @@ rule build_par_psp:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/par_psp.py "
+        f'{execenv_smk_py_script(config, "par_geds_psp_average")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -183,8 +182,7 @@ rule build_svm_psp:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_build_svm_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}'
         "--log {log} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
@@ -204,8 +202,7 @@ rule build_pars_psp_svm:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_dsp_svm_geds.py "
+        f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}'
         "--log {log} "
         "--input_file {input.dsp_pars} "
         "--output_file {output.dsp_pars} "
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index 522eb45..a5cd954 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -11,6 +11,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 intier = "psp"
 
@@ -66,8 +67,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
                 runtime=300,
             shell:
-                "{swenv} python3 -B "
-                "{basedir}/../scripts/pars_pht_qc_phy.py "
+                f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
@@ -108,8 +108,7 @@ rule build_pht_qc_phy:
         mem_swap=60,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_pht_qc_phy.py "
+        f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -147,8 +146,7 @@ rule build_plts_pht_phy:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -170,7 +168,6 @@ rule build_pars_pht_phy:
     group:
         "merge-hit"
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/merge_channels.py "
+        f'{execenv_smk_py_script(config, "merge_channels")}'
         "--input {input.infiles} "
         "--output {output} "
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index 311c14c..b0040fd 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -7,6 +7,7 @@ from legenddataflow.patterns import (
 )
 from legenddataflow.utils import set_last_rule_name
 from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.execenv import execenv_smk_py_script
 
 raw_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -39,8 +40,7 @@ rule build_raw_orca:
         mem_swap=110,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}" + f"/../scripts/build_raw_orca.py "
+        f'{execenv_smk_py_script(config, "build_tier_raw_orca")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan_maps {ro(chan_maps)} "
@@ -69,8 +69,7 @@ rule build_raw_fcio:
         mem_swap=110,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}" + f"/../scripts/build_raw_fcio.py "
+        f'{execenv_smk_py_script(config, "build_tier_raw_fcio")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan_maps {ro(chan_maps)} "
@@ -105,8 +104,7 @@ rule build_raw_blind:
         mem_swap=110,
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_raw_blind.py "
+        f'{execenv_smk_py_script(config, "build_tier_raw_blind")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan_maps {ro(chan_maps)} "
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index 91a8755..7a4a686 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -8,6 +8,7 @@ from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_log_concat,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 
 rule build_skm:
@@ -26,8 +27,7 @@ rule build_skm:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_skm.py "
+        f'{execenv_smk_py_script(config, "build_tier_skm")}'
         f"--configs {ro(configs)} "
         "--timestamp {params.timestamp} "
         "--log {log} "
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index 9d80d1b..afb080c 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -8,6 +8,7 @@ from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_log_channel,
 )
+from legenddataflow.execenv import execenv_smk_py_script
 
 
 # This rule builds the tcm files each raw file
@@ -28,8 +29,7 @@ rule build_tier_tcm:
         runtime=300,
         mem_swap=20,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/build_tcm.py "
+        f'{execenv_smk_py_script(config, "build_tier_tcm")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         "--datatype {params.datatype} "
@@ -57,8 +57,7 @@ rule build_pulser_ids:
     resources:
         runtime=300,
     shell:
-        "{swenv} python3 -B "
-        "{basedir}/../scripts/pars_tcm_pulser.py "
+        f'{execenv_smk_py_script(config, "par_geds_tcm_pulser")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         "--datatype {params.datatype} "

From 16e511be9a53f3be4c84b6425ad40bdf32661263 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 17:51:43 +0100
Subject: [PATCH 082/101] some renames

---
 .../scripts/par/geds/dsp/dplms.py               |  4 +---
 .../{blinding_calibration.py => blindcal.py}    | 16 +++++-----------
 .../raw/{check_blinding.py => blindcheck.py}    | 17 ++++++++---------
 .../geds/tcm/{pars_tcm_pulser.py => pulser.py}  |  0
 4 files changed, 14 insertions(+), 23 deletions(-)
 rename workflow/src/legenddataflow/scripts/par/geds/raw/{blinding_calibration.py => blindcal.py} (88%)
 rename workflow/src/legenddataflow/scripts/par/geds/raw/{check_blinding.py => blindcheck.py} (90%)
 rename workflow/src/legenddataflow/scripts/par/geds/tcm/{pars_tcm_pulser.py => pulser.py} (100%)

diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
index 1bea45d..2b0004b 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
@@ -48,9 +48,7 @@ def par_geds_dsp_dplms() -> None:
     channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
     channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
-    configs = LegendMetadata(args.configs, lazy=True).on(
-        args.timestamp, system=args.datatype
-    )
+    configs = TextDB(args.configs).on(args.timestamp, system=args.datatype)
     dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
 
     dplms_json = config_dict["inputs"]["dplms_pars"][args.channel]
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
similarity index 88%
rename from workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py
rename to workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
index 8af27a2..a937458 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/raw/blinding_calibration.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
@@ -13,6 +13,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from dbetto.catalog import Props
+from legendmeta import LegendMetadata
 from lgdo import lh5
 from pygama.pargen.energy_cal import HPGeCalibration
 
@@ -44,25 +45,18 @@ def par_geds_raw_blindcal() -> None:
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     log = logging.getLogger(__name__)
 
-    # load in channel map
-    # meta = LegendMetadata(args.meta, lazy=True)
-
-    # chmap = meta.channelmap(args.timestamp)
-    # if chmap.map("daq.rawid")[int(args.channel[2:])]["analysis"]["is_blinded"] is True:
+    meta = LegendMetadata(path=args.meta)
+    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
+    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
 
     # peaks to search for
     peaks_keV = np.array(
         [238, 583.191, 727.330, 860.564, 1592.53, 1620.50, 2103.53, 2614.50]
     )
 
-    E_uncal = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[0].view_as(
-        "np"
-    )
+    E_uncal = lh5.read(f"{channel}/raw/daqenergy", sorted(args.files))[0].view_as("np")
     E_uncal = E_uncal[E_uncal > 200]
     guess_keV = 2620 / np.nanpercentile(E_uncal, 99)  # usual simple guess
-    # Euc_min = peaks_keV[0] / guess_keV * 0.6
-    # Euc_max = peaks_keV[-1] / guess_keV * 1.1
-    # dEuc = 1 / guess_keV
 
     # daqenergy is an int so use integer binning (dx used to be bugged as output so switched to nbins)
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
similarity index 90%
rename from workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py
rename to workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
index 4a8f53c..7f645c1 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/raw/check_blinding.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
@@ -48,20 +48,19 @@ def par_geds_raw_blindcheck() -> None:
     log = build_log(config_dict, args.log)
 
     # get the usability status for this channel
-    chmap = (
-        LegendMetadata(args.metadata, lazy=True)
-        .channelmap(args.timestamp)
-        .map("daq.rawid")
+    chmap = LegendMetadata(args.metadata, lazy=True).channelmap(
+        args.timestamp, system=args.datatype
     )
-    det_status = chmap[int(args.channel[2:])]["analysis"]["is_blinded"]
+    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    det_status = chmap[args.channel]["analysis"]["is_blinded"]
 
     # read in calibration curve for this channel
-    blind_curve = Props.read_from(args.blind_curve)[args.channel]["pars"]["operations"]
+    blind_curve = Props.read_from(args.blind_curve)[channel]["pars"]["operations"]
 
     # load in the data
-    daqenergy = lh5.read(f"{args.channel}/raw/daqenergy", sorted(args.files))[
-        0
-    ].view_as("np")
+    daqenergy = lh5.read(f"{channel}/raw/daqenergy", sorted(args.files))[0].view_as(
+        "np"
+    )
 
     # calibrate daq energy using pre existing curve
     daqenergy_cal = ne.evaluate(
diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
similarity index 100%
rename from workflow/src/legenddataflow/scripts/par/geds/tcm/pars_tcm_pulser.py
rename to workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py

From 0eab4481033be15ab05d3e5a5071028b10ac489b Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 17:52:07 +0100
Subject: [PATCH 083/101] add exec scripts

---
 pyproject.toml | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 807e71b..86f7d5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,14 +84,43 @@ docs = [
 
 [project.scripts]
 dataprod = "legenddataflow.execenv:dataprod"
+create_chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist"
+merge_channels = "legenddataflow.scripts.merge_channels:merge_channels"
+build_filedb = "legenddataflow.scripts.build_filedb:build_filedb"
+build_tier_dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp"
+build_tier_evt = "legenddataflow.scripts.tier.evt:build_tier_evt"
+build_tier_hit = "legenddataflow.scripts.tier.hit:build_tier_hit"
+build_tier_raw_blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind"
+build_tier_raw_fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio"
+build_tier_raw_orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca"
+build_tier_skm = "legenddataflow.scripts.tier.skm:build_tier_skm"
+build_tier_tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm"
+par_geds_dsp_dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms"
+par_geds_dsp_eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt"
+par_geds_dsp_evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel"
+par_geds_dsp_nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt"
+par_geds_dsp_svm_build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build"
+par_geds_dsp_svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm"
+par_geds_dsp_tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau"
+par_geds_hit_aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe"
+par_geds_hit_ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal"
+par_geds_hit_lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq"
+par_geds_hit_qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc"
+par_geds_pht_aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe"
+par_geds_pht_ecal_part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part"
+par_geds_pht_fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast"
+par_geds_pht_qc_phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy"
+par_geds_pht_qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc"
+par_geds_psp_average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average"
+par_geds_raw_blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal"
+par_geds_raw_blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck"
+par_geds_tcm_pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser"
 
 [tool.uv.workspace]
 exclude = ["generated", "inputs", "software", "workflow"]
 
 [tool.uv]
-dev-dependencies = [
-  "legend-dataflow[test]",
-]
+default-groups = []
 
 [tool.pytest.ini_options]
 minversion = "6.0"

From 68ac1804b2d75043403ce770cce0b077e643610a Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 17:53:09 +0100
Subject: [PATCH 084/101] more bugfixes

---
 workflow/src/legenddataflow/FileKey.py             | 10 ++++++----
 workflow/src/legenddataflow/cal_grouping.py        |  2 +-
 workflow/src/legenddataflow/create_pars_keylist.py | 14 +++++---------
 workflow/src/legenddataflow/execenv.py             | 13 ++++++-------
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/workflow/src/legenddataflow/FileKey.py b/workflow/src/legenddataflow/FileKey.py
index 1e33b96..7870e46 100644
--- a/workflow/src/legenddataflow/FileKey.py
+++ b/workflow/src/legenddataflow/FileKey.py
@@ -73,7 +73,7 @@ def get_filekey_from_filename(cls, filename):
     def get_filekey_from_pattern(cls, filename, pattern=None):
         if isinstance(pattern, Path):
             pattern = pattern.as_posix()
-
+        filename = str(filename)
         key_pattern_rx = re.compile(
             regex_from_filepattern(cls.key_pattern if pattern is None else pattern)
         )
@@ -108,9 +108,11 @@ def parse_keypart(cls, keypart):
         return cls(**d)
 
     def expand(self, file_pattern, **kwargs):
-        if isinstance(file_pattern, Path):
-            file_pattern = file_pattern.as_posix()
-        wildcard_dict = dict(**self._asdict(), **kwargs)
+        file_pattern = str(file_pattern)
+        wildcard_dict = self._asdict()
+        if kwargs is not None:
+            for key, value in kwargs.items():
+                wildcard_dict[key] = value
         wildcard_dict = {
             wildcard: [wildcard_value]
             if isinstance(wildcard_value, str)
diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py
index 13145d7..646791a 100644
--- a/workflow/src/legenddataflow/cal_grouping.py
+++ b/workflow/src/legenddataflow/cal_grouping.py
@@ -194,7 +194,7 @@ def get_log_file(
                 get_pattern_log_channel(self.setup, name, processing_timestamp)
             )[0]
         else:
-            return "/tmp/log.log"
+            return "log.log"
 
     def get_timestamp(
         self, catalog, dataset, channel, tier, experiment="l200", datatype="cal"
diff --git a/workflow/src/legenddataflow/create_pars_keylist.py b/workflow/src/legenddataflow/create_pars_keylist.py
index 9325a6d..5f51828 100644
--- a/workflow/src/legenddataflow/create_pars_keylist.py
+++ b/workflow/src/legenddataflow/create_pars_keylist.py
@@ -7,10 +7,9 @@
 import warnings
 from pathlib import Path
 
-import snakemake as smk
 import yaml
 
-from .FileKey import FileKey, ProcessingFileKey
+from .FileKey import FileKey, ProcessingFileKey, regex_from_filepattern
 from .patterns import par_validity_pattern
 
 
@@ -107,13 +106,10 @@ def get_keys(keypart, search_pattern):
             wildcard_dict = dict(ext="*", **d._asdict())
         else:
             wildcard_dict = d._asdict()
-        try:
-            tier_pattern_rx = re.compile(
-                smk.io.regex_from_filepattern(str(search_pattern))
-            )
-        except AttributeError:
-            tier_pattern_rx = re.compile(smk.io.regex(str(search_pattern)))
-        fn_glob_pattern = smk.io.expand(search_pattern, **wildcard_dict)[0]
+
+        tier_pattern_rx = re.compile(regex_from_filepattern(str(search_pattern)))
+        key = FileKey.get_filekey_from_pattern(search_pattern, search_pattern)
+        fn_glob_pattern = key.get_path_from_filekey(search_pattern, **wildcard_dict)[0]
         p = Path(fn_glob_pattern)
         parts = p.parts[p.is_absolute() :]
         files = Path(p.root).glob(str(Path(*parts)))
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index 6a0239d..9fd2ac0 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -60,19 +60,19 @@ def execenv_python(config, aslist=False):
     return " ".join(cmdline), cmdenv
 
 
-def execenv_smk_py_script(workflow, config, scriptname, aslist=False):
+def execenv_smk_py_script(config, scriptname, aslist=False):
     """Returns the command used to run a Python script for a Snakemake rule.
 
     For example: `apptainer run image.sif python path/to/script.py`
     """
     config = AttrsDict(config)
 
-    cmdline, cmdenv = execenv_python(config, aslist=True)
-    cmdline.append(f"{workflow.basedir}/scripts/{scriptname}")
+    cmdline, _ = execenv_prefix(config, aslist=True)
+    cmdline.append(f"{config.paths.install}/bin/{scriptname} ")
 
     if aslist:
-        return cmdline, cmdenv
-    return " ".join(cmdline), cmdenv
+        return cmdline
+    return " ".join(cmdline)
 
 
 def dataprod() -> None:
@@ -240,9 +240,8 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
         "pip",
         "--no-cache",
         "install",
-        str(config_loc),
+        str(config_loc),  # +"[dataprod]"
     ]
-
     if args.editable:
         cmd_expr.insert(-1, "--editable")
 

From e3fe518d0e64ec40b46bfbbc7a34feabbc746990 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 17:53:32 +0100
Subject: [PATCH 085/101] config to attrsdict

---
 workflow/Snakefile           | 5 +++--
 workflow/Snakefile-build-raw | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 50be710..7bc5c65 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -18,24 +18,25 @@ from datetime import datetime
 from collections import OrderedDict
 import logging
 
+from dbetto import AttrsDict
 from legendmeta import LegendMetadata
 from legenddataflow import CalGrouping
 from legenddataflow import utils
 
 utils.subst_vars_in_snakemake_config(workflow, config)
+config = AttrsDict(config)
 
 check_in_cycle = True
 configs = utils.config_path(config)
 chan_maps = utils.chan_map_path(config)
 meta = utils.metadata_path(config)
 det_status = utils.det_status_path(config)
-swenv = utils.runcmd(config)
 basedir = workflow.basedir
 
 time = datetime.now().strftime("%Y%m%dT%H%M%SZ")
 
 if not Path(meta).exists():
-    LegendMetadata().checkout(config["legend_metadata_version"])
+    LegendMetadata(meta).checkout(config.legend_metadata_version)
 
 part = CalGrouping(config, Path(det_status) / "cal_groupings.yaml")
 
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index da0d58d..6346978 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -11,8 +11,10 @@ from pathlib import Path
 from legenddataflow import patterns as patt
 from legenddataflow import utils, execenv, ParsKeyResolve
 from datetime import datetime
+from dbetto import AttrsDict
 
 utils.subst_vars_in_snakemake_config(workflow, config)
+config = AttrsDict(config)
 
 check_in_cycle = True
 swenv = execenv.execenv_prefix(config)
@@ -22,7 +24,7 @@ det_status = utils.det_status_path(config)
 time = datetime.now().strftime("%Y%m%dT%H%M%SZ")
 
 if not Path(meta_path).exists():
-    LegendMetadata(meta_path).checkout(config["legend_metadata_version"])
+    LegendMetadata(meta_path).checkout(config.legend_metadata_version)
 
 
 wildcard_constraints:

From d2a881aacef9b24e0c85070901a62d3909352e9d Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 17:54:01 +0100
Subject: [PATCH 086/101] fix tests maybe

---
 tests/dummy_cycle/config.json                 | 40 -------------
 tests/dummy_cycle/config.yaml                 | 58 +++++++++++++++++++
 .../generated/par/dsp/validity.jsonl          |  3 -
 .../dataprod/overrides/dsp/validity.jsonl     |  1 -
 tests/test_util.py                            |  8 +--
 5 files changed, 62 insertions(+), 48 deletions(-)
 delete mode 100644 tests/dummy_cycle/config.json
 create mode 100644 tests/dummy_cycle/config.yaml
 delete mode 100644 tests/dummy_cycle/generated/par/dsp/validity.jsonl
 delete mode 100644 tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl

diff --git a/tests/dummy_cycle/config.json b/tests/dummy_cycle/config.json
deleted file mode 100644
index e9a358d..0000000
--- a/tests/dummy_cycle/config.json
+++ /dev/null
@@ -1,40 +0,0 @@
-{
-  "setups": {
-    "test": {
-      "paths": {
-        "sandbox_path": "",
-        "tier_daq": "$_/input_data/tier/daq",
-
-        "dataflow": "$_/dataflow",
-
-        "metadata": "$_/inputs",
-        "config": "$_/inputs/dataprod/config",
-        "par_overwrite": "$_/inputs/dataprod/overrides",
-        "chan_map": "$_/inputs/hardware/configuration",
-        "detector_db": "$_/inputs/hardware/detectors",
-
-        "tier": "$_/generated/tier",
-        "tier_raw": "$_/generated/tier/raw",
-        "tier_tcm": "$_/generated/tier/tcm",
-        "tier_dsp": "$_/generated/tier/dsp",
-        "tier_hit": "$_/generated/tier/hit",
-        "tier_evt": "$_/generated/tier/evt",
-
-        "par": "$_/generated/par",
-        "par_raw": "$_/generated/par/raw",
-        "par_tcm": "$_/generated/par/tcm",
-        "par_dsp": "$_/generated/par/dsp",
-        "par_hit": "$_/generated/par/hit",
-        "par_evt": "$_/generated/par/evt",
-
-        "plt": "$_/generated/plt",
-        "log": "$_/generated/log",
-
-        "tmp_plt": "$_/generated/tmp/plt",
-        "tmp_log": "$_/generated/tmp/log",
-        "tmp_filelists": "$_/generated/tmp/filelists",
-        "tmp_par": "$_/generated/tmp/par"
-      }
-    }
-  }
-}
diff --git a/tests/dummy_cycle/config.yaml b/tests/dummy_cycle/config.yaml
new file mode 100644
index 0000000..a40938d
--- /dev/null
+++ b/tests/dummy_cycle/config.yaml
@@ -0,0 +1,58 @@
+paths:
+  sandbox_path: ""
+  tier_daq: $_/generated/tier/daq
+  tier_raw_blind: ""
+
+  workflow: $_/workflow
+
+  metadata: $_/inputs
+  config: $_/inputs/dataprod/config
+  par_overwrite: $_/inputs/dataprod/overrides
+  chan_map: $_/inputs/hardware/configuration
+  detector_status: $_/inputs/datasets
+  detector_db: $_/inputs/hardware/detectors
+
+  tier: $_/generated/tier
+  tier_raw: /data2/public/prodenv/prod-blind/ref-raw/generated/tier/raw
+  tier_tcm: $_/generated/tier/tcm
+  tier_dsp: $_/generated/tier/dsp
+  tier_hit: $_/generated/tier/hit
+  tier_ann: $_/generated/tier/ann
+  tier_evt: $_/generated/tier/evt
+  tier_psp: $_/generated/tier/psp
+  tier_pht: $_/generated/tier/pht
+  tier_pan: $_/generated/tier/pan
+  tier_pet: $_/generated/tier/pet
+  tier_skm: $_/generated/tier/skm
+
+  par: $_/generated/par
+  par_raw: $_/generated/par/raw
+  par_tcm: $_/generated/par/tcm
+  par_dsp: $_/generated/par/dsp
+  par_hit: $_/generated/par/hit
+  par_evt: $_/generated/par/evt
+  par_psp: $_/generated/par/psp
+  par_pht: $_/generated/par/pht
+  par_pet: $_/generated/par/pet
+
+  plt: $_/generated/plt
+  log: $_/generated/log
+
+  tmp_plt: $_/generated/tmp/plt
+  tmp_log: $_/generated/tmp/log
+  tmp_filelists: $_/generated/tmp/filelists
+  tmp_par: $_/generated/tmp/par
+
+  src: $_/software/python/src
+  install: $_/.snakemake/legend-dataflow/venv
+
+table_format:
+  raw: ch{ch:07d}/raw
+  dsp: ch{ch:07d}/dsp
+  psp: ch{ch:07d}/dsp
+  hit: ch{ch:07d}/hit
+  pht: ch{ch:07d}/hit
+  evt: "{grp}/evt"
+  pet: "{grp}/evt"
+  skm: "{grp}/skm"
+  tcm: hardware_tcm_1
diff --git a/tests/dummy_cycle/generated/par/dsp/validity.jsonl b/tests/dummy_cycle/generated/par/dsp/validity.jsonl
deleted file mode 100644
index c730b86..0000000
--- a/tests/dummy_cycle/generated/par/dsp/validity.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-{"valid_from": "20230101T123456Z", "category": "all", "apply": ["cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]}
-{"valid_from": "20230110T123456Z", "category": "all", "apply": ["lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json", "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json"]}
-{"valid_from": "20230202T004321Z", "category": "all", "apply": ["cal/p00/r001/l200-p00-r001-cal-20230202T004321Z-par_dsp.json","lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json"]}
diff --git a/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl b/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl
deleted file mode 100644
index 4a13449..0000000
--- a/tests/dummy_cycle/inputs/dataprod/overrides/dsp/validity.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"valid_from": "20230101T123456Z", "category": "all", "apply": ["cal/p00/r000/l200-p00-r000-cal-T%-par_dsp_energy-overwrite.json"]}
diff --git a/tests/test_util.py b/tests/test_util.py
index 38d8910..01f5ffb 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -11,7 +11,7 @@
 
 testprod = Path(__file__).parent / "dummy_cycle"
 
-with (testprod / "config.json").open() as r:
+with (testprod / "config.yaml").open() as r:
     setup = json.load(r)
 subst_vars(setup, var_values={"_": str(testprod)})
 setup = setup["setups"]["test"]
@@ -42,7 +42,7 @@ def test_filekey():
     assert (
         FileKey.get_filekey_from_pattern(
             key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0],
-            utils.get_pattern_tier(setup, "dsp"),
+            utils.get_tier_path(setup, "dsp"),
         ).name
         == key.name
     )
@@ -98,6 +98,6 @@ def test_create_pars_keylist():
             pkeylist, {"cal": ["par_dsp"], "lar": ["par_dsp"]}
         )[1].apply
     ) == {
-        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.json",
-        "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.json",
+        "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml",
+        "lar/p00/r000/l200-p00-r000-lar-20230110T123456Z-par_dsp.yaml",
     }

From 14e523ed10db08ff3f93981ec0d64f1e7f37cc88 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 18:06:22 +0100
Subject: [PATCH 087/101] use channel merge func

---
 workflow/rules/channel_merge.smk |  37 +++++----
 workflow/rules/dsp.smk           | 134 +------------------------------
 workflow/rules/dsp_pars_geds.smk |   4 +-
 workflow/rules/hit.smk           |  83 +------------------
 workflow/rules/pht.smk           |  74 +----------------
 workflow/rules/psp.smk           | 119 +--------------------------
 6 files changed, 29 insertions(+), 422 deletions(-)

diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
index 21ebf25..ef2b57e 100644
--- a/workflow/rules/channel_merge.smk
+++ b/workflow/rules/channel_merge.smk
@@ -1,4 +1,4 @@
-from scripts.util.patterns import (
+from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
     get_pattern_plts,
@@ -6,7 +6,7 @@ from scripts.util.patterns import (
     get_pattern_pars_tmp,
     get_pattern_pars,
 )
-from scripts.util.utils import set_last_rule_name
+from legenddataflow.utils import set_last_rule_name
 import inspect
 from legenddataflow.execenv import execenv_smk_py_script
 
@@ -14,7 +14,7 @@ def build_merge_rules(tier,lh5_merge=False):
     rule:
         input:
             lambda wildcards: get_plt_chanlist(
-                setup,
+                config,
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                 tier,
                 basedir,
@@ -25,7 +25,7 @@ def build_merge_rules(tier,lh5_merge=False):
             timestamp="{timestamp}",
             datatype="cal",
         output:
-            get_pattern_plts(setup, tier),
+            get_pattern_plts(config, tier),
         group:
             f"merge-{tier}"
         shell:
@@ -39,7 +39,7 @@ def build_merge_rules(tier,lh5_merge=False):
     rule:
         input:
             lambda wildcards: get_par_chanlist(
-                setup,
+                config,
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                 tier,
                 basedir,
@@ -48,9 +48,12 @@ def build_merge_rules(tier,lh5_merge=False):
                 name="objects",
                 extension="pkl",
             ),
+        params:
+            timestamp="{timestamp}",
+            datatype="cal",
         output:
             get_pattern_pars(
-                setup,
+                config,
                 tier,
                 name="objects",
                 extension="dir",
@@ -71,7 +74,7 @@ def build_merge_rules(tier,lh5_merge=False):
         rule:
             input:
                 lambda wildcards: get_par_chanlist(
-                    setup,
+                    config,
                     f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                     tier,
                     basedir,
@@ -84,7 +87,7 @@ def build_merge_rules(tier,lh5_merge=False):
             output:
                 temp(
                     get_pattern_pars_tmp(
-                        setup,
+                        config,
                         tier,
                         datatype="cal",
                     )
@@ -92,7 +95,7 @@ def build_merge_rules(tier,lh5_merge=False):
             group:
                 f"merge-{tier}"
             shell:
-                execenv_smk_py_script(config, "merge_channels")
+                f'{execenv_smk_py_script(config, "merge_channels")}'
                 "--input {input} "
                 "--output {output} "
                 "--timestamp {params.timestamp} "
@@ -103,7 +106,7 @@ def build_merge_rules(tier,lh5_merge=False):
     rule:
         input:
             in_files=lambda wildcards: get_par_chanlist(
-                setup,
+                config,
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
                 tier,
                 basedir,
@@ -112,13 +115,13 @@ def build_merge_rules(tier,lh5_merge=False):
                 extension="lh5" if lh5_merge is True else inspect.signature(get_par_chanlist).parameters['extension'].default,
             ),
             in_db=get_pattern_pars_tmp(
-                setup,
+                config,
                 "dsp",
                 datatype="cal",
-            ) if lh5_merge is True else None,
-            plts=get_pattern_plts(setup, "dsp"),
+            ) if lh5_merge is True else [],
+            plts=get_pattern_plts(config, "dsp"),
             objects=get_pattern_pars(
-                setup,
+                config,
                 "dsp",
                 name="objects",
                 extension="dir",
@@ -129,17 +132,17 @@ def build_merge_rules(tier,lh5_merge=False):
             datatype="cal",
         output:
             out_file=get_pattern_pars(
-                setup,
+                config,
                 tier,
                 extension="lh5" if lh5_merge is True else inspect.signature(get_pattern_pars).parameters['extension'].default,
                 check_in_cycle=check_in_cycle,
             ),
-            out_db=get_pattern_pars(setup, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else None,
+            out_db=get_pattern_pars(config, tier, check_in_cycle=check_in_cycle) if lh5_merge is True else [],
         group:
             f"merge-{tier}"
         run:
             shell_string = (
-                execenv_smk_py_script(config, "merge_channels")
+                f'{execenv_smk_py_script(config, "merge_channels")}'
                 "--output {output.out_file} "
                 "--input {input.in_files} "
                 "--timestamp {params.timestamp} "
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index a386c86..4683a7c 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -30,140 +30,10 @@ Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
 ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
 
 
-rule build_plts_dsp:
-    input:
-        lambda wildcards: get_plt_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        get_pattern_plts(config, "dsp"),
-    group:
-        "merge-dsp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-        "--channelmap {meta} "
+include: "channel_merge.smk"
 
 
-rule build_pars_dsp_objects:
-    input:
-        lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-            name="objects",
-            extension="pkl",
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        get_pattern_pars(
-            config,
-            "dsp",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    group:
-        "merge-dsp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-        "--timestamp {params.timestamp} "
-        "--channelmap {meta} "
-
-
-rule build_pars_dsp_db:
-    input:
-        lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        temp(
-            get_pattern_pars_tmp(
-                config,
-                "dsp",
-                datatype="cal",
-            )
-        ),
-    group:
-        "merge-dsp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-        "--timestamp {params.timestamp} "
-        "--channelmap {meta} "
-
-
-rule build_pars_dsp:
-    input:
-        in_files=lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-            name="dplms",
-            extension="lh5",
-        ),
-        in_db=get_pattern_pars_tmp(
-            config,
-            "dsp",
-            datatype="cal",
-        ),
-        plts=get_pattern_plts(config, "dsp"),
-        objects=get_pattern_pars(
-            config,
-            "dsp",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-    output:
-        out_file=get_pattern_pars(
-            config,
-            "dsp",
-            extension="lh5",
-            check_in_cycle=check_in_cycle,
-        ),
-        out_db=get_pattern_pars(config, "dsp", check_in_cycle=check_in_cycle),
-    group:
-        "merge-dsp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--output {output.out_file} "
-        "--in_db {input.in_db} "
-        "--out_db {output.out_db} "
-        "--input {input.in_files} "
-        "--timestamp {params.timestamp} "
-        "--channelmap {meta} "
+build_merge_rules("dsp", lh5_merge=True)
 
 
 rule build_dsp:
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 52f5cf6..98a5a55 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -141,9 +141,7 @@ rule build_pars_dsp_dplms_geds:
         channel="{channel}",
     output:
         dsp_pars=temp(get_pattern_pars_tmp_channel(config, "dsp", "dplms")),
-        lh5_path=temp(
-            get_pattern_pars_tmp_channel(config, "dsp", "dplms", extension="lh5")
-        ),
+        lh5_path=temp(get_pattern_pars_tmp_channel(config, "dsp", extension="lh5")),
         plots=temp(get_pattern_plts_tmp_channel(config, "dsp", "dplms")),
     log:
         get_pattern_log_channel(config, "pars_dsp_dplms", time),
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index aaa1cf5..0a2c6f6 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -219,89 +219,10 @@ rule build_lq_calibration:
         "{input.files}"
 
 
-rule build_pars_hit_objects:
-    input:
-        lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "hit",
-            basedir,
-            det_status,
-            chan_maps,
-            name="objects",
-            extension="pkl",
-        ),
-    output:
-        get_pattern_pars(
-            config,
-            "hit",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    params:
-        ro_input=lambda _, input: ro(input),
-    group:
-        "merge-hit"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {params.ro_input} "
-        "--output {output} "
-        "--channelmap {meta} "
+include: "channel_merge.smk"
 
 
-rule build_plts_hit:
-    input:
-        lambda wildcards: get_plt_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "hit",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-    output:
-        get_pattern_plts(config, "hit"),
-    params:
-        ro_input=lambda _, input: ro(input),
-    group:
-        "merge-hit"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {params.ro_input} "
-        "--output {output} "
-        "--channelmap {meta} "
-
-
-rule build_pars_hit:
-    input:
-        infiles=lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "hit",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-        plts=get_pattern_plts(config, "hit"),
-        objects=get_pattern_pars(
-            config,
-            "hit",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    params:
-        ro_input=lambda _, input: {k: ro(v) for k, v in input.items()},
-    output:
-        get_pattern_pars(config, "hit", check_in_cycle=check_in_cycle),
-    group:
-        "merge-hit"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {params.ro_input[infiles]} "
-        "--output {output} "
-        "--channelmap {meta} "
+build_merge_rules("hit", lh5_merge=False)
 
 
 rule build_hit:
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index 1b792c6..bab3de7 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -795,80 +795,10 @@ rule_order_list.append(fallback_pht_rule.name)
 workflow._ruleorder.add(*rule_order_list)  # [::-1]
 
 
-rule build_pars_pht_objects:
-    input:
-        lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "pht",
-            basedir,
-            det_status,
-            chan_maps,
-            name="objects",
-            extension="pkl",
-        ),
-    output:
-        get_pattern_pars(
-            config,
-            "pht",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    group:
-        "merge-hit"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
+include: "channel_merge.smk"
 
 
-rule build_plts_pht:
-    input:
-        lambda wildcards: get_plt_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "pht",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-    output:
-        get_pattern_plts(config, "pht"),
-    group:
-        "merge-hit"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-
-
-rule build_pars_pht:
-    input:
-        infiles=lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "pht",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-        plts=get_pattern_plts(config, "pht"),
-        objects=get_pattern_pars(
-            config,
-            "pht",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    output:
-        get_pattern_pars(config, "pht", check_in_cycle=check_in_cycle),
-    group:
-        "merge-hit"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input.infiles} "
-        "--output {output} "
+build_merge_rules("pht", lh5_merge=False)
 
 
 rule build_pht:
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index 990f186..ab2e70f 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -29,125 +29,10 @@ Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
 ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
 
 
-rule build_pars_psp_objects:
-    input:
-        lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "psp",
-            basedir,
-            det_status,
-            chan_maps,
-            name="objects",
-            extension="pkl",
-        ),
-    output:
-        get_pattern_pars(
-            config,
-            "psp",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    group:
-        "merge-psp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-        "--channelmap {meta} "
-
-
-rule build_plts_psp:
-    input:
-        lambda wildcards: get_plt_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "psp",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-    output:
-        get_pattern_plts(config, "psp"),
-    group:
-        "merge-psp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-        "--channelmap {meta} "
-
-
-rule build_pars_psp_db:
-    input:
-        lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "psp",
-            basedir,
-            det_status,
-            chan_maps,
-        ),
-    output:
-        temp(
-            get_pattern_pars_tmp(
-                config,
-                "psp",
-                datatype="cal",
-            )
-        ),
-    group:
-        "merge-psp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--input {input} "
-        "--output {output} "
-        "--channelmap {meta} "
+include: "channel_merge.smk"
 
 
-rule build_pars_psp:
-    input:
-        in_files=lambda wildcards: get_par_chanlist(
-            config,
-            f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-            "dsp",
-            basedir,
-            det_status,
-            chan_maps,
-            name="dplms",
-            extension="lh5",
-        ),
-        in_db=get_pattern_pars_tmp(
-            config,
-            "psp",
-            datatype="cal",
-        ),
-        plts=get_pattern_plts(config, "psp"),
-        objects=get_pattern_pars(
-            config,
-            "psp",
-            name="objects",
-            extension="dir",
-            check_in_cycle=check_in_cycle,
-        ),
-    output:
-        out_file=get_pattern_pars(
-            config,
-            "psp",
-            extension="lh5",
-            check_in_cycle=check_in_cycle,
-        ),
-        out_db=get_pattern_pars(config, "psp", check_in_cycle=check_in_cycle),
-    group:
-        "merge-psp"
-    shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
-        "--output {output.out_file} "
-        "--in_db {input.in_db} "
-        "--out_db {output.out_db} "
-        "--input {input.in_files} "
-        "--channelmap {meta} "
+build_merge_rules("psp", lh5_merge=True)
 
 
 rule build_psp:

From f323190f8322f18457f6301494cf639d6d2f4c9c Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 18:10:49 +0100
Subject: [PATCH 088/101] json to yaml tests

---
 tests/test_util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 01f5ffb..4041614 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,6 +1,6 @@
-import json
 from pathlib import Path
 
+import yaml
 from legenddataflow import (
     FileKey,
     ParsKeyResolve,
@@ -12,7 +12,7 @@
 testprod = Path(__file__).parent / "dummy_cycle"
 
 with (testprod / "config.yaml").open() as r:
-    setup = json.load(r)
+    setup = yaml.safe_load(r)
 subst_vars(setup, var_values={"_": str(testprod)})
 setup = setup["setups"]["test"]
 

From 5002261710c48c96ec2346f86bbb273239f0de4d Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 20:19:17 +0100
Subject: [PATCH 089/101] fix wildcard constraint

---
 workflow/src/legenddataflow/cal_grouping.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/workflow/src/legenddataflow/cal_grouping.py b/workflow/src/legenddataflow/cal_grouping.py
index 646791a..b2ce781 100644
--- a/workflow/src/legenddataflow/cal_grouping.py
+++ b/workflow/src/legenddataflow/cal_grouping.py
@@ -85,7 +85,7 @@ def get_par_files(
                 ):
                     all_par_files.append(par_file)
         if channel == "default":
-            channel = "{detector}"
+            channel = "{channel}"
         selected_par_files = []
         for par_file in all_par_files:
             fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
@@ -138,7 +138,7 @@ def get_plt_files(
                 ):
                     all_par_files.append(par_file)
         if channel == "default":
-            channel = "{detector}"
+            channel = "{channel}"
         selected_par_files = []
         for par_file in all_par_files:
             fk = ProcessingFileKey.get_filekey_from_pattern(Path(par_file).name)
@@ -187,7 +187,7 @@ def get_log_file(
         if len(par_files) > 0:
             fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
             if channel == "default":
-                fk.channel = "{detector}"
+                fk.channel = "{channel}"
             else:
                 fk.channel = channel
             return fk.get_path_from_filekey(
@@ -208,7 +208,6 @@ def get_timestamp(
             datatype=datatype,
             name=None,
         )
-
         if len(par_files) > 0:
             fk = ChannelProcKey.get_filekey_from_pattern(Path(par_files[0]).name)
             return fk.timestamp
@@ -231,6 +230,6 @@ def get_wildcard_constraints(self, dataset, channel):
             out_string = ""
             for channel in exclude_chans:
                 out_string += f"(?!{channel})"
-            return out_string + r"^[VPCB]\d{1}\w{5}$"
+            return out_string + r"[PCVB]{1}\d{1}\w{5}"
         else:
-            return r"^[VPCB]\d{1}\w{5}$"
+            return r"[PCVB]{1}\d{1}\w{5}"

From 479acac5c7b0159b838e2b1e4f40772b5f5b27e2 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 20:19:41 +0100
Subject: [PATCH 090/101] split out par rules

---
 workflow/Snakefile                            |   4 +-
 workflow/rules/channel_merge.smk              |  12 +-
 workflow/rules/dsp.smk                        |   7 -
 workflow/rules/dsp_pars_geds.smk              |   6 -
 workflow/rules/hit.smk                        | 200 +----
 workflow/rules/hit_pars_geds.smk              | 205 +++++
 workflow/rules/pht.smk                        | 771 +-----------------
 workflow/rules/pht_pars_geds.smk              | 768 +++++++++++++++++
 .../{pht_fast.smk => pht_pars_geds_fast.smk}  |   0
 workflow/rules/psp.smk                        |   8 +-
 workflow/rules/psp_pars_geds.smk              |   5 +-
 11 files changed, 990 insertions(+), 996 deletions(-)
 create mode 100644 workflow/rules/hit_pars_geds.smk
 create mode 100644 workflow/rules/pht_pars_geds.smk
 rename workflow/rules/{pht_fast.smk => pht_pars_geds_fast.smk} (100%)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 7bc5c65..db7e3c3 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -58,9 +58,11 @@ include: "rules/dsp_pars_geds.smk"
 include: "rules/dsp.smk"
 include: "rules/psp_pars_geds.smk"
 include: "rules/psp.smk"
+include: "rules/hit_pars_geds.smk"
 include: "rules/hit.smk"
+include: "rules/pht_pars_geds.smk"
+include: "rules/pht_pars_geds_fast.smk"
 include: "rules/pht.smk"
-include: "rules/pht_fast.smk"
 include: "rules/ann.smk"
 include: "rules/evt.smk"
 include: "rules/skm.smk"
diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
index ef2b57e..b970840 100644
--- a/workflow/rules/channel_merge.smk
+++ b/workflow/rules/channel_merge.smk
@@ -10,7 +10,9 @@ from legenddataflow.utils import set_last_rule_name
 import inspect
 from legenddataflow.execenv import execenv_smk_py_script
 
-def build_merge_rules(tier,lh5_merge=False):
+def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
+    if lh5_tier is None:
+        lh5_tier = tier
     rule:
         input:
             lambda wildcards: get_plt_chanlist(
@@ -108,7 +110,7 @@ def build_merge_rules(tier,lh5_merge=False):
             in_files=lambda wildcards: get_par_chanlist(
                 config,
                 f"all-{wildcards.experiment}-{wildcards.period}-{wildcards.run}-cal-{wildcards.timestamp}-channels",
-                tier,
+                lh5_tier,
                 basedir,
                 det_status,
                 chan_maps,
@@ -116,13 +118,13 @@ def build_merge_rules(tier,lh5_merge=False):
             ),
             in_db=get_pattern_pars_tmp(
                 config,
-                "dsp",
+                tier,
                 datatype="cal",
             ) if lh5_merge is True else [],
-            plts=get_pattern_plts(config, "dsp"),
+            plts=get_pattern_plts(config, tier),
             objects=get_pattern_pars(
                 config,
-                "dsp",
+                tier,
                 name="objects",
                 extension="dir",
                 check_in_cycle=check_in_cycle,
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index 4683a7c..f296716 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -7,7 +7,6 @@ Snakemake rules for processing dsp tier.
 from legenddataflow.pars_loading import ParsCatalog
 from legenddataflow.create_pars_keylist import ParsKeyResolve
 from pathlib import Path
-from legenddataflow.create_pars_keylist import ParsKeyResolve
 from legenddataflow.patterns import (
     get_pattern_plts,
     get_pattern_tier,
@@ -23,12 +22,6 @@ dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     {"cal": ["par_dsp"], "lar": ["par_dsp"]},
 )
 
-dsp_par_cat_file = Path(pars_path(config)) / "dsp" / "validity.yaml"
-if dsp_par_cat_file.is_file():
-    dsp_par_cat_file.unlink()
-Path(dsp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
-ParsKeyResolve.write_to_yaml(dsp_par_catalog, dsp_par_cat_file)
-
 
 include: "channel_merge.smk"
 
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 98a5a55..86b8342 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -15,12 +15,6 @@ from legenddataflow.patterns import (
 )
 from legenddataflow.execenv import execenv_smk_py_script
 
-dsp_par_catalog = ParsKeyResolve.get_par_catalog(
-    ["-*-*-*-cal"],
-    get_pattern_tier(config, "raw", check_in_cycle=False),
-    {"cal": ["par_dsp"], "lar": ["par_dsp"]},
-)
-
 
 rule build_pars_dsp_tau_geds:
     input:
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 0a2c6f6..5d83174 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -6,19 +6,13 @@ Snakemake rules for processing hit tier. This is done in 4 steps:
 - running build hit over all channels using par file
 """
 
-from legenddataflow.pars_loading import ParsCatalog
 from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.pars_loading import ParsCatalog
 from pathlib import Path
 from legenddataflow.patterns import (
-    get_pattern_pars_tmp_channel,
-    get_pattern_plts_tmp_channel,
-    get_pattern_log_channel,
-    get_pattern_pars,
-    get_pattern_plts,
     get_pattern_tier,
-    get_pattern_pars_tmp,
     get_pattern_log,
-    get_pattern_pars,
+    get_pattern_pars_tmp,
 )
 from legenddataflow.execenv import execenv_smk_py_script
 
@@ -28,196 +22,6 @@ hit_par_catalog = ParsKeyResolve.get_par_catalog(
     {"cal": ["par_hit"], "lar": ["par_hit"]},
 )
 
-hit_par_cat_file = Path(pars_path(config)) / "hit" / "validity.yaml"
-if hit_par_cat_file.is_file():
-    hit_par_cat_file.unlink()
-Path(hit_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
-ParsKeyResolve.write_to_yaml(hit_par_catalog, hit_par_cat_file)
-
-
-# This rule builds the qc using the calibration dsp files and fft files
-rule build_qc:
-    input:
-        files=os.path.join(
-            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        fft_files=os.path.join(
-            filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist"
-        ),
-        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")),
-    log:
-        get_pattern_log_channel(config, "pars_hit_qc", time),
-    group:
-        "par-hit"
-    resources:
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_qc")}'
-        "--log {log} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--plot_path {output.plot_file} "
-        "--save_path {output.qc_file} "
-        "--pulser_file {input.pulser} "
-        "--cal_files {input.files} "
-        "--fft_files {input.fft_files} "
-        "--overwrite_files {input.overwrite_files} "
-
-
-# This rule builds the energy calibration using the calibration dsp files
-rule build_energy_calibration:
-    input:
-        files=os.path.join(
-            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        ctc_dict=ancient(
-            lambda wildcards: ParsCatalog.get_par_file(
-                config, wildcards.timestamp, "dsp"
-            )
-        ),
-        inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"),
-        in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")),
-        results_file=temp(
-            get_pattern_pars_tmp_channel(
-                config, "hit", "energy_cal_objects", extension="pkl"
-            )
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")),
-    log:
-        get_pattern_log_channel(config, "pars_hit_energy_cal", time),
-    group:
-        "par-hit"
-    resources:
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
-        "--log {log} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--plot_path {output.plot_file} "
-        "--results_path {output.results_file} "
-        "--save_path {output.ecal_file} "
-        "--inplot_dict {input.inplots} "
-        "--in_hit_dict {input.in_hit_dict} "
-        "--ctc_dict {input.ctc_dict} "
-        "--pulser_file {input.pulser} "
-        "--files {input.files}"
-
-
-# This rule builds the a/e calibration using the calibration dsp files
-rule build_aoe_calibration:
-    input:
-        files=os.path.join(
-            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"),
-        eres_file=get_pattern_pars_tmp_channel(
-            config, "hit", "energy_cal_objects", extension="pkl"
-        ),
-        inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")),
-        aoe_results=temp(
-            get_pattern_pars_tmp_channel(
-                config, "hit", "aoe_cal_objects", extension="pkl"
-            )
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")),
-    log:
-        get_pattern_log_channel(config, "pars_hit_aoe_cal", time),
-    group:
-        "par-hit"
-    resources:
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}'
-        "--log {log} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--inplots {input.inplots} "
-        "--channel {params.channel} "
-        "--aoe_results {output.aoe_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--pulser_file {input.pulser} "
-        "--ecal_file {input.ecal_file} "
-        "{input.files}"
-
-
-# This rule builds the lq calibration using the calibration dsp files
-rule build_lq_calibration:
-    input:
-        files=os.path.join(
-            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
-        ),
-        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"),
-        eres_file=get_pattern_pars_tmp_channel(
-            config, "hit", "aoe_cal_objects", extension="pkl"
-        ),
-        inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-    output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")),
-        lq_results=temp(
-            get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl")
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")),
-    log:
-        get_pattern_log_channel(config, "pars_hit_lq_cal", time),
-    group:
-        "par-hit"
-    resources:
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_lq")}'
-        "--log {log} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--inplots {input.inplots} "
-        "--channel {params.channel} "
-        "--lq_results {output.lq_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--pulser_file {input.pulser} "
-        "--ecal_file {input.ecal_file} "
-        "{input.files}"
-
 
 include: "channel_merge.smk"
 
diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk
new file mode 100644
index 0000000..8143f82
--- /dev/null
+++ b/workflow/rules/hit_pars_geds.smk
@@ -0,0 +1,205 @@
+"""
+Snakemake rules for processing hit tier. This is done in 4 steps:
+- extraction of calibration curves(s) for each channel from cal data
+- extraction of psd calibration parameters for each channel from cal data
+- combining of all channels into single pars files with associated plot and results files
+- running build hit over all channels using par file
+"""
+
+from pathlib import Path
+from legenddataflow.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_log_channel,
+    get_pattern_pars,
+    get_pattern_plts,
+    get_pattern_tier,
+    get_pattern_pars_tmp,
+    get_pattern_log,
+    get_pattern_pars,
+)
+from legenddataflow.execenv import execenv_smk_py_script
+
+
+# This rule builds the qc using the calibration dsp files and fft files
+rule build_qc:
+    input:
+        files=os.path.join(
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+        ),
+        fft_files=os.path.join(
+            filelist_path(config), "all-{experiment}-{period}-{run}-fft-dsp.filelist"
+        ),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        overwrite_files=lambda wildcards: get_overwrite_file("hit", wildcards),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        qc_file=temp(get_pattern_pars_tmp_channel(config, "hit", "qc")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "qc")),
+    log:
+        get_pattern_log_channel(config, "pars_hit_qc", time),
+    group:
+        "par-hit"
+    resources:
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_hit_qc")}'
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--plot_path {output.plot_file} "
+        "--save_path {output.qc_file} "
+        "--pulser_file {input.pulser} "
+        "--cal_files {input.files} "
+        "--fft_files {input.fft_files} "
+        "--overwrite_files {input.overwrite_files} "
+
+
+# This rule builds the energy calibration using the calibration dsp files
+rule build_energy_calibration:
+    input:
+        files=os.path.join(
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+        ),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ctc_dict=ancient(
+            lambda wildcards: ParsCatalog.get_par_file(
+                config, wildcards.timestamp, "dsp"
+            )
+        ),
+        inplots=get_pattern_plts_tmp_channel(config, "hit", "qc"),
+        in_hit_dict=get_pattern_pars_tmp_channel(config, "hit", "qc"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        ecal_file=temp(get_pattern_pars_tmp_channel(config, "hit", "energy_cal")),
+        results_file=temp(
+            get_pattern_pars_tmp_channel(
+                config, "hit", "energy_cal_objects", extension="pkl"
+            )
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "energy_cal")),
+    log:
+        get_pattern_log_channel(config, "pars_hit_energy_cal", time),
+    group:
+        "par-hit"
+    resources:
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--plot_path {output.plot_file} "
+        "--results_path {output.results_file} "
+        "--save_path {output.ecal_file} "
+        "--inplot_dict {input.inplots} "
+        "--in_hit_dict {input.in_hit_dict} "
+        "--ctc_dict {input.ctc_dict} "
+        "--pulser_file {input.pulser} "
+        "--files {input.files}"
+
+
+# This rule builds the a/e calibration using the calibration dsp files
+rule build_aoe_calibration:
+    input:
+        files=os.path.join(
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+        ),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "hit", "energy_cal"),
+        eres_file=get_pattern_pars_tmp_channel(
+            config, "hit", "energy_cal_objects", extension="pkl"
+        ),
+        inplots=get_pattern_plts_tmp_channel(config, "hit", "energy_cal"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit", "aoe_cal")),
+        aoe_results=temp(
+            get_pattern_pars_tmp_channel(
+                config, "hit", "aoe_cal_objects", extension="pkl"
+            )
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit", "aoe_cal")),
+    log:
+        get_pattern_log_channel(config, "pars_hit_aoe_cal", time),
+    group:
+        "par-hit"
+    resources:
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}'
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--inplots {input.inplots} "
+        "--channel {params.channel} "
+        "--aoe_results {output.aoe_results} "
+        "--eres_file {input.eres_file} "
+        "--hit_pars {output.hit_pars} "
+        "--plot_file {output.plot_file} "
+        "--pulser_file {input.pulser} "
+        "--ecal_file {input.ecal_file} "
+        "{input.files}"
+
+
+# This rule builds the lq calibration using the calibration dsp files
+rule build_lq_calibration:
+    input:
+        files=os.path.join(
+            filelist_path(config), "all-{experiment}-{period}-{run}-cal-dsp.filelist"
+        ),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "hit", "aoe_cal"),
+        eres_file=get_pattern_pars_tmp_channel(
+            config, "hit", "aoe_cal_objects", extension="pkl"
+        ),
+        inplots=get_pattern_plts_tmp_channel(config, "hit", "aoe_cal"),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "hit")),
+        lq_results=temp(
+            get_pattern_pars_tmp_channel(config, "hit", "objects", extension="pkl")
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "hit")),
+    log:
+        get_pattern_log_channel(config, "pars_hit_lq_cal", time),
+    group:
+        "par-hit"
+    resources:
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_hit_lq")}'
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--inplots {input.inplots} "
+        "--channel {params.channel} "
+        "--lq_results {output.lq_results} "
+        "--eres_file {input.eres_file} "
+        "--hit_pars {output.hit_pars} "
+        "--plot_file {output.plot_file} "
+        "--pulser_file {input.pulser} "
+        "--ecal_file {input.ecal_file} "
+        "{input.files}"
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index bab3de7..fa85971 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -6,19 +6,14 @@ Snakemake rules for processing pht (partition hit) tier data. This is done in 4
 - running build hit over all channels using par file
 """
 
-from legenddataflow.pars_loading import ParsCatalog
 from legenddataflow.create_pars_keylist import ParsKeyResolve
+from legenddataflow.pars_loading import ParsCatalog
 from pathlib import Path
 from legenddataflow.utils import filelist_path, set_last_rule_name
 from legenddataflow.patterns import (
-    get_pattern_pars_tmp_channel,
-    get_pattern_plts_tmp_channel,
-    get_pattern_log_channel,
-    get_pattern_plts,
     get_pattern_tier,
     get_pattern_pars_tmp,
     get_pattern_log,
-    get_pattern_pars,
 )
 from legenddataflow.execenv import execenv_smk_py_script
 
@@ -28,773 +23,9 @@ pht_par_catalog = ParsKeyResolve.get_par_catalog(
     {"cal": ["par_pht"], "lar": ["par_pht"]},
 )
 
-pht_par_cat_file = Path(pars_path(config)) / "pht" / "validity.yaml"
-if pht_par_cat_file.is_file():
-    pht_par_cat_file.unlink()
-Path(pht_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
-ParsKeyResolve.write_to_yaml(pht_par_catalog, pht_par_cat_file)
-
 intier = "psp"
 
 
-rule pht_checkpoint:
-    input:
-        files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
-        ),
-    output:
-        temp(get_pattern_pars_tmp_channel(config, "pht", "check")),
-    shell:
-        "touch {output}"
-
-
-qc_pht_rules = {}
-for key, dataset in part.datasets.items():
-    for partition in dataset.keys():
-
-        rule:
-            input:
-                cal_files=part.get_filelists(partition, key, intier),
-                fft_files=part.get_filelists(partition, key, intier, datatype="fft"),
-                pulser_files=[
-                    str(file).replace("par_pht", "par_tcm")
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="pulser_ids",
-                    )
-                ],
-                check_files=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="check",
-                ),
-                overwrite_files=get_overwrite_file(
-                    "pht",
-                    timestamp=part.get_timestamp(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                    ),
-                ),
-            wildcard_constraints:
-                channel=part.get_wildcard_constraints(partition, key),
-            params:
-                datatype="cal",
-                channel="{channel}" if key == "default" else key,
-                timestamp=part.get_timestamp(
-                    pht_par_catalog, partition, key, tier="pht"
-                ),
-            output:
-                hit_pars=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="qc",
-                    )
-                ],
-                plot_file=[
-                    temp(file)
-                    for file in part.get_plt_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="qc",
-                    )
-                ],
-            log:
-                part.get_log_file(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    "pht",
-                    time,
-                    name="par_pht_qc",
-                ),
-            group:
-                "par-pht"
-            resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 30,
-                runtime=300,
-            shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
-                "--log {log} "
-                "--configs {configs} "
-                "--metadata {meta} "
-                "--datatype {params.datatype} "
-                "--timestamp {params.timestamp} "
-                "--channel {params.channel} "
-                "--save_path {output.hit_pars} "
-                "--plot_path {output.plot_file} "
-                "--overwrite_files {input.overwrite_files} "
-                "--pulser_files {input.pulser_files} "
-                "--fft_files {input.fft_files} "
-                "--cal_files {input.cal_files}"
-
-        set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc")
-
-        if key in qc_pht_rules:
-            qc_pht_rules[key].append(list(workflow.rules)[-1])
-        else:
-            qc_pht_rules[key] = [list(workflow.rules)[-1]]
-
-
-# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
-# This rule builds the a/e calibration using the calibration dsp files for the whole partition
-rule build_pht_qc:
-    input:
-        cal_files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
-        ),
-        fft_files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist",
-        ),
-        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        check_file=get_pattern_pars_tmp_channel(config, "pht", "check"),
-        overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards),
-    params:
-        datatype="cal",
-        channel="{channel}",
-        timestamp="{timestamp}",
-    output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")),
-    log:
-        get_pattern_log_channel(config, "par_pht_qc", time),
-    group:
-        "par-pht"
-    resources:
-        mem_swap=60,
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
-        "--log {log} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--save_path {output.hit_pars} "
-        "--plot_path {output.plot_file} "
-        "--overwrite_files {input.overwrite_files} "
-        "--pulser_files {input.pulser_files} "
-        "--fft_files {input.fft_files} "
-        "--cal_files {input.cal_files}"
-
-
-fallback_qc_rule = list(workflow.rules)[-1]
-
-rule_order_list = []
-ordered = OrderedDict(qc_pht_rules)
-ordered.move_to_end("default")
-for key, items in ordered.items():
-    rule_order_list += [item.name for item in items]
-rule_order_list.append(fallback_qc_rule.name)
-workflow._ruleorder.add(*rule_order_list)  # [::-1]
-
-
-# This rule builds the energy calibration using the calibration dsp files
-rule build_per_energy_calibration:
-    input:
-        files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
-        ),
-        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"),
-        inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"),
-        ctc_dict=ancient(
-            lambda wildcards: ParsCatalog.get_par_file(
-                config, wildcards.timestamp, intier
-            )
-        ),
-    params:
-        timestamp="{timestamp}",
-        datatype="cal",
-        channel="{channel}",
-        tier="pht",
-    output:
-        ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")),
-        results_file=temp(
-            get_pattern_pars_tmp_channel(
-                config, "pht", "energy_cal_objects", extension="pkl"
-            )
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")),
-    log:
-        get_pattern_log_channel(config, "par_pht_energy_cal", time),
-    group:
-        "par-pht"
-    resources:
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
-        "--log {log} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--configs {configs} "
-        "--tier {params.tier} "
-        "--metadata {meta} "
-        "--plot_path {output.plot_file} "
-        "--results_path {output.results_file} "
-        "--save_path {output.ecal_file} "
-        "--inplot_dict {input.inplots} "
-        "--in_hit_dict {input.pht_dict} "
-        "--ctc_dict {input.ctc_dict} "
-        "--pulser_file {input.pulser} "
-        "--files {input.files}"
-
-
-part_pht_rules = {}
-for key, dataset in part.datasets.items():
-    for partition in dataset.keys():
-
-        rule:
-            input:
-                files=part.get_filelists(partition, key, intier),
-                pulser_files=[
-                    str(file).replace("par_pht", "par_tcm")
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="pulser_ids",
-                    )
-                ],
-                ecal_file=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="energy_cal",
-                ),
-                eres_file=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="energy_cal_objects",
-                    extension="pkl",
-                ),
-                inplots=part.get_plt_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="energy_cal",
-                ),
-            wildcard_constraints:
-                channel=part.get_wildcard_constraints(partition, key),
-            params:
-                datatype="cal",
-                channel="{channel}" if key == "default" else key,
-                timestamp=part.get_timestamp(
-                    pht_par_catalog, partition, key, tier="pht"
-                ),
-            output:
-                hit_pars=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="partcal",
-                    )
-                ],
-                partcal_results=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="partcal_objects",
-                        extension="pkl",
-                    )
-                ],
-                plot_file=[
-                    temp(file)
-                    for file in part.get_plt_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="partcal",
-                    )
-                ],
-            log:
-                part.get_log_file(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    "pht",
-                    time,
-                    name="par_pht_partcal",
-                ),
-            group:
-                "par-pht"
-            resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
-                runtime=300,
-            shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
-                "--log {log} "
-                "--configs {configs} "
-                "--datatype {params.datatype} "
-                "--timestamp {params.timestamp} "
-                "--inplots {input.inplots} "
-                "--channel {params.channel} "
-                "--metadata {meta} "
-                "--fit_results {output.partcal_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
-
-        set_last_rule_name(
-            workflow, f"{key}-{partition}-build_pht_energy_super_calibrations"
-        )
-
-        if key in part_pht_rules:
-            part_pht_rules[key].append(list(workflow.rules)[-1])
-        else:
-            part_pht_rules[key] = [list(workflow.rules)[-1]]
-
-
-# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
-# This rule builds the a/e calibration using the calibration dsp files for the whole partition
-rule build_pht_energy_super_calibrations:
-    input:
-        files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist",
-        ),
-        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"),
-        eres_file=get_pattern_pars_tmp_channel(
-            config, "pht", "energy_cal_objects", extension="pkl"
-        ),
-        inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"),
-    params:
-        datatype="cal",
-        channel="{channel}",
-        timestamp="{timestamp}",
-    output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")),
-        partcal_results=temp(
-            get_pattern_pars_tmp_channel(
-                config, "pht", "partcal_objects", extension="pkl"
-            )
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")),
-    log:
-        get_pattern_log_channel(config, "par_pht_partcal", time),
-    group:
-        "par-pht"
-    resources:
-        mem_swap=60,
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
-        "--log {log} "
-        "--configs {configs} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--channel {params.channel} "
-        "--metadata {meta} "
-        "--inplots {input.inplots} "
-        "--fit_results {output.partcal_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
-
-
-fallback_pht_rule = list(workflow.rules)[-1]
-
-rule_order_list = []
-ordered = OrderedDict(part_pht_rules)
-ordered.move_to_end("default")
-for key, items in ordered.items():
-    rule_order_list += [item.name for item in items]
-rule_order_list.append(fallback_pht_rule.name)
-workflow._ruleorder.add(*rule_order_list)  # [::-1]
-
-part_pht_rules = {}
-for key, dataset in part.datasets.items():
-    for partition in dataset.keys():
-
-        rule:
-            input:
-                files=part.get_filelists(partition, key, intier),
-                pulser_files=[
-                    str(file).replace("par_pht", "par_tcm")
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="pulser_ids",
-                    )
-                ],
-                ecal_file=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="partcal",
-                ),
-                eres_file=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="partcal_objects",
-                    extension="pkl",
-                ),
-                inplots=part.get_plt_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="partcal",
-                ),
-            wildcard_constraints:
-                channel=part.get_wildcard_constraints(partition, key),
-            params:
-                datatype="cal",
-                channel="{channel}" if key == "default" else key,
-                timestamp=part.get_timestamp(
-                    pht_par_catalog, partition, key, tier="pht"
-                ),
-            output:
-                hit_pars=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="aoecal",
-                    )
-                ],
-                aoe_results=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="aoecal_objects",
-                        extension="pkl",
-                    )
-                ],
-                plot_file=[
-                    temp(file)
-                    for file in part.get_plt_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="aoecal",
-                    )
-                ],
-            log:
-                part.get_log_file(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    "pht",
-                    time,
-                    name="par_pht_aoe",
-                ),
-            group:
-                "par-pht"
-            resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
-                runtime=300,
-            shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
-                "--log {log} "
-                "--configs {configs} "
-                "--metadata {meta} "
-                "--datatype {params.datatype} "
-                "--timestamp {params.timestamp} "
-                "--inplots {input.inplots} "
-                "--channel {params.channel} "
-                "--aoe_results {output.aoe_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
-
-        set_last_rule_name(
-            workflow, f"{key}-{partition}-build_pht_aoe_calibrations"
-        )
-
-        if key in part_pht_rules:
-            part_pht_rules[key].append(list(workflow.rules)[-1])
-        else:
-            part_pht_rules[key] = [list(workflow.rules)[-1]]
-
-
-# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
-# This rule builds the a/e calibration using the calibration dsp files for the whole partition
-rule build_pht_aoe_calibrations:
-    input:
-        files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
-        ),
-        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"),
-        eres_file=get_pattern_pars_tmp_channel(
-            config, "pht", "partcal_objects", extension="pkl"
-        ),
-        inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"),
-    params:
-        datatype="cal",
-        channel="{channel}",
-        timestamp="{timestamp}",
-    output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")),
-        aoe_results=temp(
-            get_pattern_pars_tmp_channel(
-                config, "pht", "aoecal_objects", extension="pkl"
-            )
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")),
-    log:
-        get_pattern_log_channel(config, "par_pht_aoe_cal", time),
-    group:
-        "par-pht"
-    resources:
-        mem_swap=60,
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
-        "--log {log} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--inplots {input.inplots} "
-        "--channel {params.channel} "
-        "--aoe_results {output.aoe_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
-
-
-fallback_pht_rule = list(workflow.rules)[-1]
-
-rule_order_list = []
-ordered = OrderedDict(part_pht_rules)
-ordered.move_to_end("default")
-for key, items in ordered.items():
-    rule_order_list += [item.name for item in items]
-rule_order_list.append(fallback_pht_rule.name)
-workflow._ruleorder.add(*rule_order_list)  # [::-1]
-
-part_pht_rules = {}
-for key, dataset in part.datasets.items():
-    for partition in dataset.keys():
-
-        rule:
-            input:
-                files=part.get_filelists(partition, key, intier),
-                pulser_files=[
-                    str(file).replace("par_pht", "par_tcm")
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="pulser_ids",
-                    )
-                ],
-                ecal_file=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="aoecal",
-                ),
-                eres_file=part.get_par_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="aoecal_objects",
-                    extension="pkl",
-                ),
-                inplots=part.get_plt_files(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    tier="pht",
-                    name="aoecal",
-                ),
-            wildcard_constraints:
-                channel=part.get_wildcard_constraints(partition, key),
-            params:
-                datatype="cal",
-                channel="{channel}" if key == "default" else key,
-                timestamp=part.get_timestamp(
-                    pht_par_catalog, partition, key, tier="pht"
-                ),
-            output:
-                hit_pars=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                    )
-                ],
-                lq_results=[
-                    temp(file)
-                    for file in part.get_par_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                        name="objects",
-                        extension="pkl",
-                    )
-                ],
-                plot_file=[
-                    temp(file)
-                    for file in part.get_plt_files(
-                        pht_par_catalog,
-                        partition,
-                        key,
-                        tier="pht",
-                    )
-                ],
-            log:
-                part.get_log_file(
-                    pht_par_catalog,
-                    partition,
-                    key,
-                    "pht",
-                    time,
-                    name="par_pht_lq",
-                ),
-            group:
-                "par-pht"
-            resources:
-                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
-                runtime=300,
-            shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
-                "--log {log} "
-                "--configs {configs} "
-                "--metadata {meta} "
-                "--datatype {params.datatype} "
-                "--timestamp {params.timestamp} "
-                "--inplots {input.inplots} "
-                "--channel {params.channel} "
-                "--lq_results {output.lq_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
-
-        set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration")
-
-        if key in part_pht_rules:
-            part_pht_rules[key].append(list(workflow.rules)[-1])
-        else:
-            part_pht_rules[key] = [list(workflow.rules)[-1]]
-
-
-# This rule builds the lq calibration using the calibration dsp files for the whole partition
-rule build_pht_lq_calibration:
-    input:
-        files=os.path.join(
-            filelist_path(config),
-            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
-        ),
-        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
-        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"),
-        eres_file=get_pattern_pars_tmp_channel(
-            config, "pht", "aoecal_objects", extension="pkl"
-        ),
-        inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"),
-    params:
-        datatype="cal",
-        channel="{channel}",
-        timestamp="{timestamp}",
-    output:
-        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")),
-        lq_results=temp(
-            get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl")
-        ),
-        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")),
-    log:
-        get_pattern_log_channel(config, "par_pht_lq_cal", time),
-    group:
-        "par-pht"
-    resources:
-        mem_swap=60,
-        runtime=300,
-    shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
-        "--log {log} "
-        "--configs {configs} "
-        "--metadata {meta} "
-        "--datatype {params.datatype} "
-        "--timestamp {params.timestamp} "
-        "--inplots {input.inplots} "
-        "--channel {params.channel} "
-        "--lq_results {output.lq_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
-
-
-fallback_pht_rule = list(workflow.rules)[-1]
-
-rule_order_list = []
-ordered = OrderedDict(part_pht_rules)
-ordered.move_to_end("default")
-for key, items in ordered.items():
-    rule_order_list += [item.name for item in items]
-rule_order_list.append(fallback_pht_rule.name)
-workflow._ruleorder.add(*rule_order_list)  # [::-1]
-
-
 include: "channel_merge.smk"
 
 
diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk
new file mode 100644
index 0000000..4e5e126
--- /dev/null
+++ b/workflow/rules/pht_pars_geds.smk
@@ -0,0 +1,768 @@
+"""
+Snakemake rules for processing pht (partition hit) tier data. This is done in 4 steps:
+- extraction of calibration curves(s) for each run for each channel from cal data
+- extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
+- combining of all channels into single pars files with associated plot and results files
+- running build hit over all channels using par file
+"""
+
+from legenddataflow.pars_loading import ParsCatalog
+from legenddataflow.create_pars_keylist import ParsKeyResolve
+from pathlib import Path
+from legenddataflow.utils import filelist_path, set_last_rule_name
+from legenddataflow.patterns import (
+    get_pattern_pars_tmp_channel,
+    get_pattern_plts_tmp_channel,
+    get_pattern_log_channel,
+    get_pattern_plts,
+    get_pattern_tier,
+    get_pattern_pars_tmp,
+    get_pattern_log,
+    get_pattern_pars,
+)
+from legenddataflow.execenv import execenv_smk_py_script
+
+pht_par_catalog = ParsKeyResolve.get_par_catalog(
+    ["-*-*-*-cal"],
+    get_pattern_tier(config, "raw", check_in_cycle=False),
+    {"cal": ["par_pht"], "lar": ["par_pht"]},
+)
+
+intier = "psp"
+
+qc_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                cal_files=part.get_filelists(partition, key, intier),
+                fft_files=part.get_filelists(partition, key, intier, datatype="fft"),
+                pulser_files=[
+                    str(file).replace("par_pht", "par_tcm")
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
+                overwrite_files=get_overwrite_file(
+                    "pht",
+                    timestamp=part.get_timestamp(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                    ),
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    pht_par_catalog, partition, key, tier="pht"
+                ),
+            output:
+                hit_pars=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="qc",
+                    )
+                ],
+                plot_file=[
+                    temp(file)
+                    for file in part.get_plt_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="qc",
+                    )
+                ],
+            log:
+                part.get_log_file(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    "pht",
+                    time,
+                    name="par_pht_qc",
+                ),
+            group:
+                "par-pht"
+            resources:
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 30,
+                runtime=300,
+            shell:
+                f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
+                "--log {log} "
+                "--configs {configs} "
+                "--metadata {meta} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--channel {params.channel} "
+                "--save_path {output.hit_pars} "
+                "--plot_path {output.plot_file} "
+                "--overwrite_files {input.overwrite_files} "
+                "--pulser_files {input.pulser_files} "
+                "--fft_files {input.fft_files} "
+                "--cal_files {input.cal_files}"
+
+        set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc")
+
+        if key in qc_pht_rules:
+            qc_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            qc_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_pht_qc:
+    input:
+        cal_files=os.path.join(
+            filelist_path(config),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
+        ),
+        fft_files=os.path.join(
+            filelist_path(config),
+            "all-{experiment}-{period}-{run}-fft-" + f"{intier}.filelist",
+        ),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        overwrite_files=lambda wildcards: get_overwrite_file("pht", wildcards=wildcards),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "qc")),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "qc")),
+    log:
+        get_pattern_log_channel(config, "par_pht_qc", time),
+    group:
+        "par-pht"
+    resources:
+        mem_swap=60,
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--save_path {output.hit_pars} "
+        "--plot_path {output.plot_file} "
+        "--overwrite_files {input.overwrite_files} "
+        "--pulser_files {input.pulser_files} "
+        "--fft_files {input.fft_files} "
+        "--cal_files {input.cal_files}"
+
+
+fallback_qc_rule = list(workflow.rules)[-1]
+
+rule_order_list = []
+ordered = OrderedDict(qc_pht_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_qc_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
+
+# This rule builds the energy calibration using the calibration dsp files
+rule build_per_energy_calibration:
+    input:
+        files=os.path.join(
+            filelist_path(config),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
+        ),
+        pulser=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        pht_dict=get_pattern_pars_tmp_channel(config, "pht", "qc"),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "qc"),
+        ctc_dict=ancient(
+            lambda wildcards: ParsCatalog.get_par_file(
+                config, wildcards.timestamp, intier
+            )
+        ),
+    params:
+        timestamp="{timestamp}",
+        datatype="cal",
+        channel="{channel}",
+        tier="pht",
+    output:
+        ecal_file=temp(get_pattern_pars_tmp_channel(config, "pht", "energy_cal")),
+        results_file=temp(
+            get_pattern_pars_tmp_channel(
+                config, "pht", "energy_cal_objects", extension="pkl"
+            )
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "energy_cal")),
+    log:
+        get_pattern_log_channel(config, "par_pht_energy_cal", time),
+    group:
+        "par-pht"
+    resources:
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
+        "--log {log} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--configs {configs} "
+        "--tier {params.tier} "
+        "--metadata {meta} "
+        "--plot_path {output.plot_file} "
+        "--results_path {output.results_file} "
+        "--save_path {output.ecal_file} "
+        "--inplot_dict {input.inplots} "
+        "--in_hit_dict {input.pht_dict} "
+        "--ctc_dict {input.ctc_dict} "
+        "--pulser_file {input.pulser} "
+        "--files {input.files}"
+
+
+part_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                files=part.get_filelists(partition, key, intier),
+                pulser_files=[
+                    str(file).replace("par_pht", "par_tcm")
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
+                ecal_file=part.get_par_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="energy_cal",
+                ),
+                eres_file=part.get_par_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="energy_cal_objects",
+                    extension="pkl",
+                ),
+                inplots=part.get_plt_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="energy_cal",
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    pht_par_catalog, partition, key, tier="pht"
+                ),
+            output:
+                hit_pars=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="partcal",
+                    )
+                ],
+                partcal_results=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="partcal_objects",
+                        extension="pkl",
+                    )
+                ],
+                plot_file=[
+                    temp(file)
+                    for file in part.get_plt_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="partcal",
+                    )
+                ],
+            log:
+                part.get_log_file(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    "pht",
+                    time,
+                    name="par_pht_partcal",
+                ),
+            group:
+                "par-pht"
+            resources:
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
+                runtime=300,
+            shell:
+                f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
+                "--log {log} "
+                "--configs {configs} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--inplots {input.inplots} "
+                "--channel {params.channel} "
+                "--metadata {meta} "
+                "--fit_results {output.partcal_results} "
+                "--eres_file {input.eres_file} "
+                "--hit_pars {output.hit_pars} "
+                "--plot_file {output.plot_file} "
+                "--ecal_file {input.ecal_file} "
+                "--pulser_files {input.pulser_files} "
+                "--input_files {input.files}"
+
+        set_last_rule_name(
+            workflow, f"{key}-{partition}-build_pht_energy_super_calibrations"
+        )
+
+        if key in part_pht_rules:
+            part_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            part_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_pht_energy_super_calibrations:
+    input:
+        files=os.path.join(
+            filelist_path(config),
+            "all-{experiment}-{period}-{run}-cal" + f"-{intier}.filelist",
+        ),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "energy_cal"),
+        eres_file=get_pattern_pars_tmp_channel(
+            config, "pht", "energy_cal_objects", extension="pkl"
+        ),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "energy_cal"),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "partcal")),
+        partcal_results=temp(
+            get_pattern_pars_tmp_channel(
+                config, "pht", "partcal_objects", extension="pkl"
+            )
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "partcal")),
+    log:
+        get_pattern_log_channel(config, "par_pht_partcal", time),
+    group:
+        "par-pht"
+    resources:
+        mem_swap=60,
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
+        "--log {log} "
+        "--configs {configs} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--channel {params.channel} "
+        "--metadata {meta} "
+        "--inplots {input.inplots} "
+        "--fit_results {output.partcal_results} "
+        "--eres_file {input.eres_file} "
+        "--hit_pars {output.hit_pars} "
+        "--plot_file {output.plot_file} "
+        "--ecal_file {input.ecal_file} "
+        "--pulser_files {input.pulser_files} "
+        "--input_files {input.files}"
+
+
+fallback_pht_rule = list(workflow.rules)[-1]
+
+rule_order_list = []
+ordered = OrderedDict(part_pht_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_pht_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
+part_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                files=part.get_filelists(partition, key, intier),
+                pulser_files=[
+                    str(file).replace("par_pht", "par_tcm")
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
+                ecal_file=part.get_par_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="partcal",
+                ),
+                eres_file=part.get_par_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="partcal_objects",
+                    extension="pkl",
+                ),
+                inplots=part.get_plt_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="partcal",
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    pht_par_catalog, partition, key, tier="pht"
+                ),
+            output:
+                hit_pars=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="aoecal",
+                    )
+                ],
+                aoe_results=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="aoecal_objects",
+                        extension="pkl",
+                    )
+                ],
+                plot_file=[
+                    temp(file)
+                    for file in part.get_plt_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="aoecal",
+                    )
+                ],
+            log:
+                part.get_log_file(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    "pht",
+                    time,
+                    name="par_pht_aoe",
+                ),
+            group:
+                "par-pht"
+            resources:
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
+                runtime=300,
+            shell:
+                f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
+                "--log {log} "
+                "--configs {configs} "
+                "--metadata {meta} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--inplots {input.inplots} "
+                "--channel {params.channel} "
+                "--aoe_results {output.aoe_results} "
+                "--eres_file {input.eres_file} "
+                "--hit_pars {output.hit_pars} "
+                "--plot_file {output.plot_file} "
+                "--ecal_file {input.ecal_file} "
+                "--pulser_files {input.pulser_files} "
+                "--input_files {input.files}"
+
+        set_last_rule_name(
+            workflow, f"{key}-{partition}-build_pht_aoe_calibrations"
+        )
+
+        if key in part_pht_rules:
+            part_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            part_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
+# This rule builds the a/e calibration using the calibration dsp files for the whole partition
+rule build_pht_aoe_calibrations:
+    input:
+        files=os.path.join(
+            filelist_path(config),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
+        ),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "partcal"),
+        eres_file=get_pattern_pars_tmp_channel(
+            config, "pht", "partcal_objects", extension="pkl"
+        ),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "partcal"),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht", "aoecal")),
+        aoe_results=temp(
+            get_pattern_pars_tmp_channel(
+                config, "pht", "aoecal_objects", extension="pkl"
+            )
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht", "aoecal")),
+    log:
+        get_pattern_log_channel(config, "par_pht_aoe_cal", time),
+    group:
+        "par-pht"
+    resources:
+        mem_swap=60,
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--inplots {input.inplots} "
+        "--channel {params.channel} "
+        "--aoe_results {output.aoe_results} "
+        "--eres_file {input.eres_file} "
+        "--hit_pars {output.hit_pars} "
+        "--plot_file {output.plot_file} "
+        "--ecal_file {input.ecal_file} "
+        "--pulser_files {input.pulser_files} "
+        "--input_files {input.files}"
+
+
+fallback_pht_rule = list(workflow.rules)[-1]
+
+rule_order_list = []
+ordered = OrderedDict(part_pht_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_pht_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
+
+part_pht_rules = {}
+for key, dataset in part.datasets.items():
+    for partition in dataset.keys():
+
+        rule:
+            input:
+                files=part.get_filelists(partition, key, intier),
+                pulser_files=[
+                    str(file).replace("par_pht", "par_tcm")
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="pulser_ids",
+                    )
+                ],
+                ecal_file=part.get_par_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="aoecal",
+                ),
+                eres_file=part.get_par_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="aoecal_objects",
+                    extension="pkl",
+                ),
+                inplots=part.get_plt_files(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    tier="pht",
+                    name="aoecal",
+                ),
+            wildcard_constraints:
+                channel=part.get_wildcard_constraints(partition, key),
+            params:
+                datatype="cal",
+                channel="{channel}" if key == "default" else key,
+                timestamp=part.get_timestamp(
+                    pht_par_catalog, partition, key, tier="pht"
+                ),
+            output:
+                hit_pars=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                    )
+                ],
+                lq_results=[
+                    temp(file)
+                    for file in part.get_par_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                        name="objects",
+                        extension="pkl",
+                    )
+                ],
+                plot_file=[
+                    temp(file)
+                    for file in part.get_plt_files(
+                        pht_par_catalog,
+                        partition,
+                        key,
+                        tier="pht",
+                    )
+                ],
+            log:
+                part.get_log_file(
+                    pht_par_catalog,
+                    partition,
+                    key,
+                    "pht",
+                    time,
+                    name="par_pht_lq",
+                ),
+            group:
+                "par-pht"
+            resources:
+                mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
+                runtime=300,
+            shell:
+                f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
+                "--log {log} "
+                "--configs {configs} "
+                "--metadata {meta} "
+                "--datatype {params.datatype} "
+                "--timestamp {params.timestamp} "
+                "--inplots {input.inplots} "
+                "--channel {params.channel} "
+                "--lq_results {output.lq_results} "
+                "--eres_file {input.eres_file} "
+                "--hit_pars {output.hit_pars} "
+                "--plot_file {output.plot_file} "
+                "--ecal_file {input.ecal_file} "
+                "--pulser_files {input.pulser_files} "
+                "--input_files {input.files}"
+
+        set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration")
+
+        if key in part_pht_rules:
+            part_pht_rules[key].append(list(workflow.rules)[-1])
+        else:
+            part_pht_rules[key] = [list(workflow.rules)[-1]]
+
+
+# This rule builds the lq calibration using the calibration dsp files for the whole partition
+rule build_pht_lq_calibration:
+    input:
+        files=os.path.join(
+            filelist_path(config),
+            "all-{experiment}-{period}-{run}-cal-" + f"{intier}.filelist",
+        ),
+        pulser_files=get_pattern_pars_tmp_channel(config, "tcm", "pulser_ids"),
+        ecal_file=get_pattern_pars_tmp_channel(config, "pht", "aoecal"),
+        eres_file=get_pattern_pars_tmp_channel(
+            config, "pht", "aoecal_objects", extension="pkl"
+        ),
+        inplots=get_pattern_plts_tmp_channel(config, "pht", "aoecal"),
+    params:
+        datatype="cal",
+        channel="{channel}",
+        timestamp="{timestamp}",
+    output:
+        hit_pars=temp(get_pattern_pars_tmp_channel(config, "pht")),
+        lq_results=temp(
+            get_pattern_pars_tmp_channel(config, "pht", "objects", extension="pkl")
+        ),
+        plot_file=temp(get_pattern_plts_tmp_channel(config, "pht")),
+    log:
+        get_pattern_log_channel(config, "par_pht_lq_cal", time),
+    group:
+        "par-pht"
+    resources:
+        mem_swap=60,
+        runtime=300,
+    shell:
+        f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
+        "--log {log} "
+        "--configs {configs} "
+        "--metadata {meta} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--inplots {input.inplots} "
+        "--channel {params.channel} "
+        "--lq_results {output.lq_results} "
+        "--eres_file {input.eres_file} "
+        "--hit_pars {output.hit_pars} "
+        "--plot_file {output.plot_file} "
+        "--ecal_file {input.ecal_file} "
+        "--pulser_files {input.pulser_files} "
+        "--input_files {input.files}"
+
+
+fallback_pht_rule = list(workflow.rules)[-1]
+
+rule_order_list = []
+ordered = OrderedDict(part_pht_rules)
+ordered.move_to_end("default")
+for key, items in ordered.items():
+    rule_order_list += [item.name for item in items]
+rule_order_list.append(fallback_pht_rule.name)
+workflow._ruleorder.add(*rule_order_list)  # [::-1]
diff --git a/workflow/rules/pht_fast.smk b/workflow/rules/pht_pars_geds_fast.smk
similarity index 100%
rename from workflow/rules/pht_fast.smk
rename to workflow/rules/pht_pars_geds_fast.smk
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index ab2e70f..e264ca4 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -22,17 +22,11 @@ psp_par_catalog = ParsKeyResolve.get_par_catalog(
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
-psp_par_cat_file = Path(pars_path(config)) / "psp" / "validity.yaml"
-if psp_par_cat_file.is_file():
-    psp_par_cat_file.unlink()
-Path(psp_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
-ParsKeyResolve.write_to_yaml(psp_par_catalog, psp_par_cat_file)
-
 
 include: "channel_merge.smk"
 
 
-build_merge_rules("psp", lh5_merge=True)
+build_merge_rules("psp", lh5_merge=True, lh5_tier="dsp")
 
 
 rule build_psp:
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 10d9ab1..8d53220 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -4,8 +4,8 @@ Snakemake rules for processing psp (partition dsp) tier data.
 - extraction of psd calibration parameters and partition level energy fitting for each channel over whole partition from cal data
 """
 
-from legenddataflow.create_pars_keylist import ParsKeyResolve
 from legenddataflow.utils import set_last_rule_name
+from legenddataflow.create_pars_keylist import ParsKeyResolve
 from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
@@ -22,6 +22,7 @@ psp_par_catalog = ParsKeyResolve.get_par_catalog(
     {"cal": ["par_psp"], "lar": ["par_psp"]},
 )
 
+
 psp_rules = {}
 for key, dataset in part.datasets.items():
     for partition in dataset.keys():
@@ -119,7 +120,7 @@ for key, dataset in part.datasets.items():
 
 # Merged energy and a/e supercalibrations to reduce number of rules as they have same inputs/outputs
 # This rule builds the a/e calibration using the calibration dsp files for the whole partition
-rule build_par_psp:
+rule build_par_psp_fallback:
     input:
         dsp_pars=get_pattern_pars_tmp_channel(config, "dsp", "eopt"),
         dsp_objs=get_pattern_pars_tmp_channel(config, "dsp", "objects", extension="pkl"),

From 4dcdf97cd2f4834d6f445996a51dc8d8daf12898 Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 20:20:14 +0100
Subject: [PATCH 091/101] test try 2

---
 tests/test_util.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_util.py b/tests/test_util.py
index 4041614..53b1b00 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -14,7 +14,6 @@
 with (testprod / "config.yaml").open() as r:
     setup = yaml.safe_load(r)
 subst_vars(setup, var_values={"_": str(testprod)})
-setup = setup["setups"]["test"]
 
 
 def test_util():

From b58601d75bc28216b414bc696f7eb55c96e5f08e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Tue, 4 Feb 2025 20:39:06 +0100
Subject: [PATCH 092/101] tests v3

---
 tests/dummy_cycle/config.yaml |  2 +-
 tests/test_util.py            | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/dummy_cycle/config.yaml b/tests/dummy_cycle/config.yaml
index a40938d..97de306 100644
--- a/tests/dummy_cycle/config.yaml
+++ b/tests/dummy_cycle/config.yaml
@@ -1,6 +1,6 @@
 paths:
   sandbox_path: ""
-  tier_daq: $_/generated/tier/daq
+  tier_daq: $_/input_data/tier/daq
   tier_raw_blind: ""
 
   workflow: $_/workflow
diff --git a/tests/test_util.py b/tests/test_util.py
index 53b1b00..9d3c424 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from pathlib import Path
 
 import yaml
@@ -18,7 +19,10 @@
 
 def test_util():
     assert utils.tier_path(setup) == str(testprod / "generated/tier")
-    assert utils.unix_time("20230101T123456Z") == 1672572896.0
+    time = datetime.now()
+    assert int(utils.unix_time(time.strftime("%Y%m%dT%H%M%SZ"))) == int(
+        time.timestamp()
+    )
 
 
 def test_filekey():
@@ -41,7 +45,7 @@ def test_filekey():
     assert (
         FileKey.get_filekey_from_pattern(
             key.get_path_from_filekey(patterns.get_pattern_tier(setup, "dsp"))[0],
-            utils.get_tier_path(setup, "dsp"),
+            patterns.get_pattern_tier(setup, "dsp"),
         ).name
         == key.name
     )
@@ -70,9 +74,10 @@ def test_create_pars_keylist():
         "cal/p00/r000/l200-p00-r000-cal-20230101T123456Z-par_dsp.yaml",
         "lar/p00/r000/l200-p00-r000-lar-20230102T123456Z-par_dsp.yaml",
     }
-
     keylist = sorted(
-        ParsKeyResolve.get_keys("-*-*-*-cal", patterns.get_pattern_tier_daq(setup)),
+        ParsKeyResolve.get_keys(
+            "-*-*-*-cal", patterns.get_pattern_tier_daq(setup, extension="*")
+        ),
         key=FileKey.get_unix_timestamp,
     )
     assert keylist == [

From ed1586d4a1ec90d051005c6422e9fb574ec97aa4 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 4 Feb 2025 23:21:44 +0100
Subject: [PATCH 093/101] use dashes not underscores in executable names

---
 pyproject.toml                          | 64 ++++++++++++-------------
 workflow/rules/ann.smk                  |  4 +-
 workflow/rules/blinding_calibration.smk |  6 +--
 workflow/rules/blinding_check.smk       |  6 +--
 workflow/rules/chanlist_gen.smk         |  2 +-
 workflow/rules/channel_merge.smk        |  8 ++--
 workflow/rules/dsp.smk                  |  2 +-
 workflow/rules/dsp_pars_geds.smk        | 14 +++---
 workflow/rules/evt.smk                  |  4 +-
 workflow/rules/hit.smk                  |  2 +-
 workflow/rules/hit_pars_geds.smk        |  8 ++--
 workflow/rules/pht.smk                  |  2 +-
 workflow/rules/pht_pars_geds.smk        | 18 +++----
 workflow/rules/pht_pars_geds_fast.smk   |  4 +-
 workflow/rules/psp.smk                  |  2 +-
 workflow/rules/psp_pars_geds.smk        |  8 ++--
 workflow/rules/qc_phy.smk               |  8 ++--
 workflow/rules/raw.smk                  |  6 +--
 workflow/rules/skm.smk                  |  2 +-
 workflow/rules/tcm.smk                  |  4 +-
 20 files changed, 87 insertions(+), 87 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 86f7d5b..3aae00f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,38 +83,38 @@ docs = [
 ]
 
 [project.scripts]
-dataprod = "legenddataflow.execenv:dataprod"
-create_chankeylist = "legenddataflow.scripts.create_chankeylist:create_chankeylist"
-merge_channels = "legenddataflow.scripts.merge_channels:merge_channels"
-build_filedb = "legenddataflow.scripts.build_filedb:build_filedb"
-build_tier_dsp = "legenddataflow.scripts.tier.dsp:build_tier_dsp"
-build_tier_evt = "legenddataflow.scripts.tier.evt:build_tier_evt"
-build_tier_hit = "legenddataflow.scripts.tier.hit:build_tier_hit"
-build_tier_raw_blind = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind"
-build_tier_raw_fcio = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio"
-build_tier_raw_orca = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca"
-build_tier_skm = "legenddataflow.scripts.tier.skm:build_tier_skm"
-build_tier_tcm = "legenddataflow.scripts.tier.tcm:build_tier_tcm"
-par_geds_dsp_dplms = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms"
-par_geds_dsp_eopt = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt"
-par_geds_dsp_evtsel = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel"
-par_geds_dsp_nopt = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt"
-par_geds_dsp_svm_build = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build"
-par_geds_dsp_svm = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm"
-par_geds_dsp_tau = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau"
-par_geds_hit_aoe = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe"
-par_geds_hit_ecal = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal"
-par_geds_hit_lq = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq"
-par_geds_hit_qc = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc"
-par_geds_pht_aoe = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe"
-par_geds_pht_ecal_part = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part"
-par_geds_pht_fast = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast"
-par_geds_pht_qc_phy = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy"
-par_geds_pht_qc = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc"
-par_geds_psp_average = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average"
-par_geds_raw_blindcal = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal"
-par_geds_raw_blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck"
-par_geds_tcm_pulser = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser"
+dataprod                = "legenddataflow.execenv:dataprod"
+create-chankeylist      = "legenddataflow.scripts.create_chankeylist:create_chankeylist"
+merge-channels          = "legenddataflow.scripts.merge_channels:merge_channels"
+build-filedb            = "legenddataflow.scripts.build_filedb:build_filedb"
+build-tier-dsp          = "legenddataflow.scripts.tier.dsp:build_tier_dsp"
+build-tier-evt          = "legenddataflow.scripts.tier.evt:build_tier_evt"
+build-tier-hit          = "legenddataflow.scripts.tier.hit:build_tier_hit"
+build-tier-raw-blind    = "legenddataflow.scripts.tier.raw_blind:build_tier_raw_blind"
+build-tier-raw-fcio     = "legenddataflow.scripts.tier.raw_fcio:build_tier_raw_fcio"
+build-tier-raw-orca     = "legenddataflow.scripts.tier.raw_orca:build_tier_raw_orca"
+build-tier-skm          = "legenddataflow.scripts.tier.skm:build_tier_skm"
+build-tier-tcm          = "legenddataflow.scripts.tier.tcm:build_tier_tcm"
+par-geds-dsp-dplms      = "legenddataflow.scripts.par.geds.dsp.dplms:par_geds_dsp_dplms"
+par-geds-dsp-eopt       = "legenddataflow.scripts.par.geds.dsp.eopt:par_geds_dsp_eopt"
+par-geds-dsp-evtsel     = "legenddataflow.scripts.par.geds.dsp.evtsel:par_geds_dsp_evtsel"
+par-geds-dsp-nopt       = "legenddataflow.scripts.par.geds.dsp.nopt:par_geds_dsp_nopt"
+par-geds-dsp-svm-build  = "legenddataflow.scripts.par.geds.dsp.svm_build:par_geds_dsp_svm_build"
+par-geds-dsp-svm        = "legenddataflow.scripts.par.geds.dsp.svm:par_geds_dsp_svm"
+par-geds-dsp-tau        = "legenddataflow.scripts.par.geds.dsp.tau:par_geds_dsp_tau"
+par-geds-hit-aoe        = "legenddataflow.scripts.par.geds.hit.aoe:par_geds_hit_aoe"
+par-geds-hit-ecal       = "legenddataflow.scripts.par.geds.hit.ecal:par_geds_hit_ecal"
+par-geds-hit-lq         = "legenddataflow.scripts.par.geds.hit.lq:par_geds_hit_lq"
+par-geds-hit-qc         = "legenddataflow.scripts.par.geds.hit.qc:par_geds_hit_qc"
+par-geds-pht-aoe        = "legenddataflow.scripts.par.geds.pht.aoe:par_geds_pht_aoe"
+par-geds-pht-ecal-part  = "legenddataflow.scripts.par.geds.pht.ecal_part:par_geds_pht_ecal_part"
+par-geds-pht-fast       = "legenddataflow.scripts.par.geds.pht.fast:par_geds_pht_fast"
+par-geds-pht-qc-phy     = "legenddataflow.scripts.par.geds.pht.qc_phy:par_geds_pht_qc_phy"
+par-geds-pht-qc         = "legenddataflow.scripts.par.geds.pht.qc:par_geds_pht_qc"
+par-geds-psp-average    = "legenddataflow.scripts.par.geds.psp.average:par_geds_psp_average"
+par-geds-raw-blindcal   = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal"
+par-geds-raw-blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck"
+par-geds-tcm-pulser     = "legenddataflow.scripts.par.geds.raw.tcm.pulser:par_geds_raw_pulser"
 
 [tool.uv.workspace]
 exclude = ["generated", "inputs", "software", "workflow"]
diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index 8e7429f..b1f5edf 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -30,7 +30,7 @@ rule build_ann:
         runtime=300,
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
+        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -61,7 +61,7 @@ rule build_pan:
         runtime=300,
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
+        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index 1a69313..fce7b11 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -38,7 +38,7 @@ rule build_blinding_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_raw_blindcal")}'
+        f'{execenv_smk_py_script(config, "par-geds-raw-blindcal")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -66,7 +66,7 @@ rule build_plts_blinding:
     group:
         "merge-blindcal"
     shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
+        f'{execenv_smk_py_script(config, "merge-channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -88,6 +88,6 @@ rule build_pars_blinding:
     group:
         "merge-blindcal"
     shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
+        f'{execenv_smk_py_script(config, "merge-channels")}'
         "--input {input.infiles} "
         "--output {output} "
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index bd9b796..b5ec5b4 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -39,7 +39,7 @@ rule build_blinding_check:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_raw_blindcheck")}'
+        f'{execenv_smk_py_script(config, "par-geds-raw-blindcheck")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -67,7 +67,7 @@ rule build_plts_raw:
     group:
         "merge-raw"
     shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
+        f'{execenv_smk_py_script(config, "merge-channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -91,4 +91,4 @@ rule build_pars_raw:
     group:
         "merge-raw"
     shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
+        f'{execenv_smk_py_script(config, "merge-channels")}'
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
index b6a3ea8..abee65a 100644
--- a/workflow/rules/chanlist_gen.smk
+++ b/workflow/rules/chanlist_gen.smk
@@ -23,7 +23,7 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps):
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = execenv_smk_py_script(config, "create_chankeylist")
+    cmd = execenv_smk_py_script(config, "create-chankeylist")
     cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
     cmd += f"--datatype cal --output_file {output_file}"
     os.system(cmd)
diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
index b970840..8ba185d 100644
--- a/workflow/rules/channel_merge.smk
+++ b/workflow/rules/channel_merge.smk
@@ -31,7 +31,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
         group:
             f"merge-{tier}"
         shell:
-            f'{execenv_smk_py_script(config, "merge_channels")}'
+            f'{execenv_smk_py_script(config, "merge-channels")}'
             "--input {input} "
             "--output {output} "
             "--channelmap {meta} "
@@ -64,7 +64,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
         group:
             f"merge-{tier}"
         shell:
-            f'{execenv_smk_py_script(config, "merge_channels")}'
+            f'{execenv_smk_py_script(config, "merge-channels")}'
             "--input {input} "
             "--output {output} "
             "--timestamp {params.timestamp} "
@@ -97,7 +97,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
             group:
                 f"merge-{tier}"
             shell:
-                f'{execenv_smk_py_script(config, "merge_channels")}'
+                f'{execenv_smk_py_script(config, "merge-channels")}'
                 "--input {input} "
                 "--output {output} "
                 "--timestamp {params.timestamp} "
@@ -144,7 +144,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
             f"merge-{tier}"
         run:
             shell_string = (
-                f'{execenv_smk_py_script(config, "merge_channels")}'
+                f'{execenv_smk_py_script(config, "merge-channels")}'
                 "--output {output.out_file} "
                 "--input {input.in_files} "
                 "--timestamp {params.timestamp} "
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index f296716..9acf3ae 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -52,7 +52,7 @@ rule build_dsp:
         runtime=300,
         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
+        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
         "--log {log} "
         "--tier dsp "
         f"--configs {ro(configs)} "
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 86b8342..2dc6d47 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -36,7 +36,7 @@ rule build_pars_dsp_tau_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_tau")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-tau")}'
         "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
@@ -70,7 +70,7 @@ rule build_pars_evtsel_geds:
         runtime=300,
         mem_swap=70,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_evtsel")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-evtsel")}'
         "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
@@ -107,7 +107,7 @@ rule build_pars_dsp_nopt_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_nopt")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-nopt")}'
         "--database {input.database} "
         "--configs {configs} "
         "--log {log} "
@@ -144,7 +144,7 @@ rule build_pars_dsp_dplms_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_dplms")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-dplms")}'
         "--fft_raw_filelist {input.fft_files} "
         "--peak_file {input.peak_file} "
         "--database {input.database} "
@@ -182,7 +182,7 @@ rule build_pars_dsp_eopt_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_eopt")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-eopt")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -213,7 +213,7 @@ rule build_svm_dsp_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}'
         "--log {log} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
@@ -233,7 +233,7 @@ rule build_pars_dsp_svm_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}'
         "--log {log} "
         "--input_file {input.dsp_pars} "
         "--output_file {output.dsp_pars} "
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index 1bcb2a4..260fc31 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -44,7 +44,7 @@ rule build_evt:
         mem_swap=50,
     run:
         shell_string = (
-            f'{execenv_smk_py_script(config, "build_tier_evt")}'
+            f'{execenv_smk_py_script(config, "build-tier-evt")}'
             f"--configs {ro(configs)} "
             f"--metadata {ro(meta)} "
             "--log {log} "
@@ -96,7 +96,7 @@ rule build_pet:
         mem_swap=50,
     run:
         shell_string = (
-            f'{execenv_smk_py_script(config, "build_tier_evt")}'
+            f'{execenv_smk_py_script(config, "build-tier-evt")}'
             f"--configs {ro(configs)} "
             f"--metadata {ro(meta)} "
             "--log {log} "
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 5d83174..a6cf3c0 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -50,7 +50,7 @@ rule build_hit:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_hit")}'
+        f'{execenv_smk_py_script(config, "build-tier-hit")}'
         f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--log {log} "
diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk
index 8143f82..7db1fcc 100644
--- a/workflow/rules/hit_pars_geds.smk
+++ b/workflow/rules/hit_pars_geds.smk
@@ -46,7 +46,7 @@ rule build_qc:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_qc")}'
+        f'{execenv_smk_py_script(config, "par-geds-hit-qc")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -94,7 +94,7 @@ rule build_energy_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
+        f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -142,7 +142,7 @@ rule build_aoe_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_aoe")}'
+        f'{execenv_smk_py_script(config, "par-geds-hit-aoe")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -188,7 +188,7 @@ rule build_lq_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_lq")}'
+        f'{execenv_smk_py_script(config, "par-geds-hit-lq")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index fa85971..402ab8d 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -53,7 +53,7 @@ rule build_pht:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_hit")}'
+        f'{execenv_smk_py_script(config, "build-tier-hit")}'
         f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--log {log} "
diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk
index 4e5e126..50b6972 100644
--- a/workflow/rules/pht_pars_geds.smk
+++ b/workflow/rules/pht_pars_geds.smk
@@ -101,7 +101,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 30,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
+                f'{execenv_smk_py_script(config, "par-geds-pht-qc")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -152,7 +152,7 @@ rule build_pht_qc:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_qc")}'
+        f'{execenv_smk_py_script(config, "par-geds-pht-qc")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -213,7 +213,7 @@ rule build_per_energy_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_hit_ecal")}'
+        f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}'
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -325,7 +325,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
+                f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
@@ -385,7 +385,7 @@ rule build_pht_energy_super_calibrations:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_ecal_part")}'
+        f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -506,7 +506,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
+                f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -566,7 +566,7 @@ rule build_pht_aoe_calibrations:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_aoe")}'
+        f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
@@ -685,7 +685,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
+                f'{execenv_smk_py_script(config, "par-geds-pht-lq")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -740,7 +740,7 @@ rule build_pht_lq_calibration:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_lq")}'
+        f'{execenv_smk_py_script(config, "par-geds-pht-lq")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/pht_pars_geds_fast.smk b/workflow/rules/pht_pars_geds_fast.smk
index 2379753..26aca7e 100644
--- a/workflow/rules/pht_pars_geds_fast.smk
+++ b/workflow/rules/pht_pars_geds_fast.smk
@@ -106,7 +106,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 12,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_fast")}'
+                f'{execenv_smk_py_script(config, "par-geds-pht-fast")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
@@ -164,7 +164,7 @@ rule par_pht_fast:
         mem_swap=50,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_fast")}'
+        f'{execenv_smk_py_script(config, "par-geds-pht-fast")}'
         "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index e264ca4..1f6d36f 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -52,7 +52,7 @@ rule build_psp:
         runtime=300,
         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_dsp")}'
+        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
         "--log {log} "
         "--tier psp "
         f"--configs {ro(configs)} "
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 8d53220..8f6ee77 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -97,7 +97,7 @@ for key, dataset in part.datasets.items():
             resources:
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_psp_average")}'
+                f'{execenv_smk_py_script(config, "par-geds-psp-average")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
@@ -142,7 +142,7 @@ rule build_par_psp_fallback:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_psp_average")}'
+        f'{execenv_smk_py_script(config, "par-geds-psp-average")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -183,7 +183,7 @@ rule build_svm_psp:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_svm_build")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}'
         "--log {log} "
         "--train_data {input.train_data} "
         "--train_hyperpars {input.hyperpars} "
@@ -203,7 +203,7 @@ rule build_pars_psp_svm:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_dsp_svm")}'
+        f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}'
         "--log {log} "
         "--input_file {input.dsp_pars} "
         "--output_file {output.dsp_pars} "
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index a5cd954..8d6250e 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -67,7 +67,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}'
+                f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}'
                 "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
@@ -108,7 +108,7 @@ rule build_pht_qc_phy:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_pht_qc_phy")}'
+        f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}'
         "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
@@ -146,7 +146,7 @@ rule build_plts_pht_phy:
     group:
         "merge-hit"
     shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
+        f'{execenv_smk_py_script(config, "merge-channels")}'
         "--input {input} "
         "--output {output} "
 
@@ -168,6 +168,6 @@ rule build_pars_pht_phy:
     group:
         "merge-hit"
     shell:
-        f'{execenv_smk_py_script(config, "merge_channels")}'
+        f'{execenv_smk_py_script(config, "merge-channels")}'
         "--input {input.infiles} "
         "--output {output} "
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index b0040fd..2411c14 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -40,7 +40,7 @@ rule build_raw_orca:
         mem_swap=110,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_raw_orca")}'
+        f'{execenv_smk_py_script(config, "build-tier-raw-orca")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan_maps {ro(chan_maps)} "
@@ -69,7 +69,7 @@ rule build_raw_fcio:
         mem_swap=110,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_raw_fcio")}'
+        f'{execenv_smk_py_script(config, "build-tier-raw-fcio")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan_maps {ro(chan_maps)} "
@@ -104,7 +104,7 @@ rule build_raw_blind:
         mem_swap=110,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_raw_blind")}'
+        f'{execenv_smk_py_script(config, "build-tier-raw-blind")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan_maps {ro(chan_maps)} "
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index 7a4a686..3f38c3b 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -27,7 +27,7 @@ rule build_skm:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_skm")}'
+        f'{execenv_smk_py_script(config, "build-tier-skm")}'
         f"--configs {ro(configs)} "
         "--timestamp {params.timestamp} "
         "--log {log} "
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index afb080c..b954bf3 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -29,7 +29,7 @@ rule build_tier_tcm:
         runtime=300,
         mem_swap=20,
     shell:
-        f'{execenv_smk_py_script(config, "build_tier_tcm")}'
+        f'{execenv_smk_py_script(config, "build-tier-tcm")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         "--datatype {params.datatype} "
@@ -57,7 +57,7 @@ rule build_pulser_ids:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par_geds_tcm_pulser")}'
+        f'{execenv_smk_py_script(config, "par-geds-tcm-pulser")}'
         "--log {log} "
         f"--configs {ro(configs)} "
         "--datatype {params.datatype} "

From 16d03a2e798480e21cb314effacab17cc2f438a1 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Tue, 4 Feb 2025 23:37:23 +0100
Subject: [PATCH 094/101] replace underscores with dashes in cmdline options

---
 workflow/rules/ann.smk                        |   8 +-
 workflow/rules/blinding_calibration.smk       |   4 +-
 workflow/rules/blinding_check.smk             |   4 +-
 workflow/rules/chanlist_gen.smk               |   4 +-
 workflow/rules/channel_merge.smk              |   4 +-
 workflow/rules/dsp.smk                        |   4 +-
 workflow/rules/dsp_pars_geds.smk              |  56 ++++----
 workflow/rules/evt.smk                        |  24 ++--
 workflow/rules/hit.smk                        |   4 +-
 workflow/rules/hit_pars_geds.smk              |  50 +++----
 workflow/rules/pht.smk                        |   4 +-
 workflow/rules/pht_pars_geds.smk              | 122 +++++++++---------
 workflow/rules/pht_pars_geds_fast.smk         |  28 ++--
 workflow/rules/psp.smk                        |   4 +-
 workflow/rules/psp_pars_geds.smk              |  28 ++--
 workflow/rules/qc_phy.smk                     |  12 +-
 workflow/rules/raw.smk                        |   8 +-
 workflow/rules/skm.smk                        |   2 +-
 workflow/rules/tcm.smk                        |   4 +-
 .../scripts/create_chankeylist.py             |   4 +-
 .../legenddataflow/scripts/merge_channels.py  |   4 +-
 .../scripts/par/geds/dsp/dplms.py             |  10 +-
 .../scripts/par/geds/dsp/eopt.py              |   8 +-
 .../scripts/par/geds/dsp/evtsel.py            |  10 +-
 .../scripts/par/geds/dsp/nopt.py              |   6 +-
 .../scripts/par/geds/dsp/svm.py               |   4 +-
 .../scripts/par/geds/dsp/svm_build.py         |   6 +-
 .../scripts/par/geds/dsp/tau.py               |  10 +-
 .../scripts/par/geds/hit/aoe.py               |  14 +-
 .../scripts/par/geds/hit/ecal.py              |  16 +--
 .../legenddataflow/scripts/par/geds/hit/lq.py |  14 +-
 .../legenddataflow/scripts/par/geds/hit/qc.py |  14 +-
 .../scripts/par/geds/pht/aoe.py               |  16 +--
 .../scripts/par/geds/pht/ecal_part.py         |  16 +--
 .../scripts/par/geds/pht/fast.py              |  16 +--
 .../legenddataflow/scripts/par/geds/pht/lq.py |  16 +--
 .../legenddataflow/scripts/par/geds/pht/qc.py |  14 +-
 .../scripts/par/geds/pht/qc_phy.py            |   6 +-
 .../scripts/par/geds/psp/average.py           |   8 +-
 .../scripts/par/geds/raw/blindcal.py          |   4 +-
 .../scripts/par/geds/raw/blindcheck.py        |   4 +-
 .../scripts/par/geds/tcm/pulser.py            |   4 +-
 .../src/legenddataflow/scripts/tier/dsp.py    |   4 +-
 .../src/legenddataflow/scripts/tier/evt.py    |  12 +-
 .../src/legenddataflow/scripts/tier/hit.py    |   4 +-
 .../legenddataflow/scripts/tier/raw_blind.py  |   4 +-
 .../legenddataflow/scripts/tier/raw_fcio.py   |   2 +-
 .../legenddataflow/scripts/tier/raw_orca.py   |   2 +-
 .../src/legenddataflow/scripts/tier/skm.py    |   2 +-
 49 files changed, 314 insertions(+), 314 deletions(-)

diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index b1f5edf..5cdd016 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -39,8 +39,8 @@ rule build_ann:
         "--timestamp {params.timestamp} "
         "--input {input.dsp_file} "
         "--output {output.tier_file} "
-        "--db_file {output.db_file} "
-        "--pars_file {input.pars_file} "
+        "--db-file {output.db_file} "
+        "--pars-file {input.pars_file} "
 
 
 rule build_pan:
@@ -70,5 +70,5 @@ rule build_pan:
         "--timestamp {params.timestamp} "
         "--input {input.dsp_file} "
         "--output {output.tier_file} "
-        "--db_file {output.db_file} "
-        "--pars_file {input.pars_file} "
+        "--db-file {output.db_file} "
+        "--pars-file {input.pars_file} "
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index fce7b11..31e71a8 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -45,8 +45,8 @@ rule build_blinding_calibration:
         "--channel {params.channel} "
         "--configs {configs} "
         "--meta {params.meta} "
-        "--plot_file {output.plot_file} "
-        "--blind_curve {output.par_file} "
+        "--plot-file {output.plot_file} "
+        "--blind-curve {output.par_file} "
         "--files {input.files} "
 
 
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index b5ec5b4..2bee385 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -47,8 +47,8 @@ rule build_blinding_check:
         "--configs {configs} "
         "--metadata {meta} "
         "--output {output.check_file} "
-        "--blind_curve {input.par_file} "
-        "--plot_file {output.plot_file} "
+        "--blind-curve {input.par_file} "
+        "--plot-file {output.plot_file} "
         "--files {input.files} "
 
 
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
index abee65a..750104b 100644
--- a/workflow/rules/chanlist_gen.smk
+++ b/workflow/rules/chanlist_gen.smk
@@ -24,8 +24,8 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps):
     )
 
     cmd = execenv_smk_py_script(config, "create-chankeylist")
-    cmd += f" --det_status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
-    cmd += f"--datatype cal --output_file {output_file}"
+    cmd += f" --det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
+    cmd += f"--datatype cal --output-file {output_file}"
     os.system(cmd)
 
     with open(output_file) as r:
diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
index 8ba185d..b221fc3 100644
--- a/workflow/rules/channel_merge.smk
+++ b/workflow/rules/channel_merge.smk
@@ -152,8 +152,8 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
             )
             if lh5_merge is True:
                 shell_string += (
-                    "--in_db {input.in_db} "
-                    "--out_db {output.out_db} "
+                    "--in-db {input.in_db} "
+                    "--out-db {output.out_db} "
                 )
             shell(shell_string)
 
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index 9acf3ae..20c5d38 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -61,5 +61,5 @@ rule build_dsp:
         "--timestamp {params.timestamp} "
         "--input {params.ro_input[raw_file]} "
         "--output {output.tier_file} "
-        "--db_file {output.db_file} "
-        "--pars_file {params.ro_input[pars_file]} "
+        "--db-file {output.db_file} "
+        "--pars-file {params.ro_input[pars_file]} "
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 2dc6d47..8d1f075 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -42,10 +42,10 @@ rule build_pars_dsp_tau_geds:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--plot_path {output.plots} "
-        "--output_file {output.decay_const} "
-        "--pulser_file {input.pulser} "
-        "--raw_files {input.files}"
+        "--plot-path {output.plots} "
+        "--output-file {output.decay_const} "
+        "--pulser-file {input.pulser} "
+        "--raw-files {input.files}"
 
 
 rule build_pars_evtsel_geds:
@@ -76,11 +76,11 @@ rule build_pars_evtsel_geds:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--peak_file {output.peak_file} "
-        "--pulser_file {input.pulser_file} "
-        "--decay_const {input.database} "
-        "--raw_cal {input.raw_cal} "
-        "--raw_filelist {input.files}"
+        "--peak-file {output.peak_file} "
+        "--pulser-file {input.pulser_file} "
+        "--decay-const {input.database} "
+        "--raw-cal {input.raw_cal} "
+        "--raw-filelist {input.files}"
 
 
 # This rule builds the optimal energy filter parameters for the dsp using fft files
@@ -115,9 +115,9 @@ rule build_pars_dsp_nopt_geds:
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
         "--inplots {input.inplots} "
-        "--plot_path {output.plots} "
-        "--dsp_pars {output.dsp_pars_nopt} "
-        "--raw_filelist {input.files}"
+        "--plot-path {output.plots} "
+        "--dsp-pars {output.dsp_pars_nopt} "
+        "--raw-filelist {input.files}"
 
 
 # This rule builds the dplms energy filter for the dsp using fft and cal files
@@ -145,8 +145,8 @@ rule build_pars_dsp_dplms_geds:
         runtime=300,
     shell:
         f'{execenv_smk_py_script(config, "par-geds-dsp-dplms")}'
-        "--fft_raw_filelist {input.fft_files} "
-        "--peak_file {input.peak_file} "
+        "--fft-raw-filelist {input.fft_files} "
+        "--peak-file {input.peak_file} "
         "--database {input.database} "
         "--inplots {input.inplots} "
         "--configs {configs} "
@@ -154,9 +154,9 @@ rule build_pars_dsp_dplms_geds:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--dsp_pars {output.dsp_pars} "
-        "--lh5_path {output.lh5_path} "
-        "--plot_path {output.plots} "
+        "--dsp-pars {output.dsp_pars} "
+        "--lh5-path {output.lh5_path} "
+        "--plot-path {output.plots} "
 
 
 # This rule builds the optimal energy filter parameters for the dsp using calibration dsp files
@@ -188,12 +188,12 @@ rule build_pars_dsp_eopt_geds:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--peak_file {input.peak_file} "
+        "--peak-file {input.peak_file} "
         "--inplots {input.inplots} "
-        "--decay_const {input.decay_const} "
-        "--plot_path {output.plots} "
-        "--qbb_grid_path {output.qbb_grid} "
-        "--final_dsp_pars {output.dsp_pars}"
+        "--decay-const {input.decay_const} "
+        "--plot-path {output.plots} "
+        "--qbb-grid-path {output.qbb_grid} "
+        "--final-dsp-pars {output.dsp_pars}"
 
 
 rule build_svm_dsp_geds:
@@ -215,9 +215,9 @@ rule build_svm_dsp_geds:
     shell:
         f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}'
         "--log {log} "
-        "--train_data {input.train_data} "
-        "--train_hyperpars {input.hyperpars} "
-        "--output_file {output.dsp_pars}"
+        "--train-data {input.train_data} "
+        "--train-hyperpars {input.hyperpars} "
+        "--output-file {output.dsp_pars}"
 
 
 rule build_pars_dsp_svm_geds:
@@ -235,6 +235,6 @@ rule build_pars_dsp_svm_geds:
     shell:
         f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}'
         "--log {log} "
-        "--input_file {input.dsp_pars} "
-        "--output_file {output.dsp_pars} "
-        "--svm_file {input.svm_file}"
+        "--input-file {input.dsp_pars} "
+        "--output-file {output.dsp_pars} "
+        "--svm-file {input.svm_file}"
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index 260fc31..cc72249 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -51,15 +51,15 @@ rule build_evt:
             "--tier {params.tier} "
             "--datatype {params.datatype} "
             "--timestamp {params.timestamp} "
-            "--xtc_file {params.ro_input[xtalk_matrix]} "
-            "--par_files {params.ro_input[par_files]} "
-            "--hit_file {params.ro_input[hit_file]} "
-            "--tcm_file {params.ro_input[tcm_file]} "
-            "--dsp_file {params.ro_input[dsp_file]} "
+            "--xtc-file {params.ro_input[xtalk_matrix]} "
+            "--par-files {params.ro_input[par_files]} "
+            "--hit-file {params.ro_input[hit_file]} "
+            "--tcm-file {params.ro_input[tcm_file]} "
+            "--dsp-file {params.ro_input[dsp_file]} "
             "--output {output} "
         )
         if input.ann_file is not None:
-            shell_string += "--ann_file {params.ro_input[ann_file]} "
+            shell_string += "--ann-file {params.ro_input[ann_file]} "
 
         shell(shell_string)
 
@@ -103,15 +103,15 @@ rule build_pet:
             "--tier {params.tier} "
             "--datatype {params.datatype} "
             "--timestamp {params.timestamp} "
-            "--xtc_file {params.ro_input[xtalk_matrix]} "
-            "--par_files {params.ro_input[par_files]} "
-            "--hit_file {params.ro_input[hit_file]} "
-            "--tcm_file {params.ro_input[tcm_file]} "
-            "--dsp_file {params.ro_input[dsp_file]} "
+            "--xtc-file {params.ro_input[xtalk_matrix]} "
+            "--par-files {params.ro_input[par_files]} "
+            "--hit-file {params.ro_input[hit_file]} "
+            "--tcm-file {params.ro_input[tcm_file]} "
+            "--dsp-file {params.ro_input[dsp_file]} "
             "--output {output} "
         )
         if input.ann_file is not None:
-            shell_string += "--ann_file {params.ro_input[ann_file]} "
+            shell_string += "--ann-file {params.ro_input[ann_file]} "
 
         shell(shell_string)
 
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index a6cf3c0..469b0f5 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -57,7 +57,7 @@ rule build_hit:
         "--tier {params.tier} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--pars_file {params.ro_input[pars_file]} "
+        "--pars-file {params.ro_input[pars_file]} "
         "--output {output.tier_file} "
         "--input {params.ro_input[dsp_file]} "
-        "--db_file {output.db_file}"
+        "--db-file {output.db_file}"
diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk
index 7db1fcc..0b0aef6 100644
--- a/workflow/rules/hit_pars_geds.smk
+++ b/workflow/rules/hit_pars_geds.smk
@@ -53,12 +53,12 @@ rule build_qc:
         "--channel {params.channel} "
         "--configs {configs} "
         "--metadata {meta} "
-        "--plot_path {output.plot_file} "
-        "--save_path {output.qc_file} "
-        "--pulser_file {input.pulser} "
-        "--cal_files {input.files} "
-        "--fft_files {input.fft_files} "
-        "--overwrite_files {input.overwrite_files} "
+        "--plot-path {output.plot_file} "
+        "--save-path {output.qc_file} "
+        "--pulser-file {input.pulser} "
+        "--cal-files {input.files} "
+        "--fft-files {input.fft_files} "
+        "--overwrite-files {input.overwrite_files} "
 
 
 # This rule builds the energy calibration using the calibration dsp files
@@ -101,13 +101,13 @@ rule build_energy_calibration:
         "--channel {params.channel} "
         "--configs {configs} "
         "--metadata {meta} "
-        "--plot_path {output.plot_file} "
-        "--results_path {output.results_file} "
-        "--save_path {output.ecal_file} "
-        "--inplot_dict {input.inplots} "
-        "--in_hit_dict {input.in_hit_dict} "
-        "--ctc_dict {input.ctc_dict} "
-        "--pulser_file {input.pulser} "
+        "--plot-path {output.plot_file} "
+        "--results-path {output.results_file} "
+        "--save-path {output.ecal_file} "
+        "--inplot-dict {input.inplots} "
+        "--in-hit-dict {input.in_hit_dict} "
+        "--ctc-dict {input.ctc_dict} "
+        "--pulser-file {input.pulser} "
         "--files {input.files}"
 
 
@@ -150,12 +150,12 @@ rule build_aoe_calibration:
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
         "--channel {params.channel} "
-        "--aoe_results {output.aoe_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--pulser_file {input.pulser} "
-        "--ecal_file {input.ecal_file} "
+        "--aoe-results {output.aoe_results} "
+        "--eres-file {input.eres_file} "
+        "--hit-pars {output.hit_pars} "
+        "--plot-file {output.plot_file} "
+        "--pulser-file {input.pulser} "
+        "--ecal-file {input.ecal_file} "
         "{input.files}"
 
 
@@ -196,10 +196,10 @@ rule build_lq_calibration:
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
         "--channel {params.channel} "
-        "--lq_results {output.lq_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--pulser_file {input.pulser} "
-        "--ecal_file {input.ecal_file} "
+        "--lq-results {output.lq_results} "
+        "--eres-file {input.eres_file} "
+        "--hit-pars {output.hit_pars} "
+        "--plot-file {output.plot_file} "
+        "--pulser-file {input.pulser} "
+        "--ecal-file {input.ecal_file} "
         "{input.files}"
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index 402ab8d..447cee0 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -60,7 +60,7 @@ rule build_pht:
         "--tier {params.tier} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--pars_file {params.ro_input[pars_file]} "
+        "--pars-file {params.ro_input[pars_file]} "
         "--output {output.tier_file} "
         "--input {params.ro_input[dsp_file]} "
-        "--db_file {output.db_file}"
+        "--db-file {output.db_file}"
diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk
index 50b6972..cec57b5 100644
--- a/workflow/rules/pht_pars_geds.smk
+++ b/workflow/rules/pht_pars_geds.smk
@@ -108,12 +108,12 @@ for key, dataset in part.datasets.items():
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--channel {params.channel} "
-                "--save_path {output.hit_pars} "
-                "--plot_path {output.plot_file} "
-                "--overwrite_files {input.overwrite_files} "
-                "--pulser_files {input.pulser_files} "
-                "--fft_files {input.fft_files} "
-                "--cal_files {input.cal_files}"
+                "--save-path {output.hit_pars} "
+                "--plot-path {output.plot_file} "
+                "--overwrite-files {input.overwrite_files} "
+                "--pulser-files {input.pulser_files} "
+                "--fft-files {input.fft_files} "
+                "--cal-files {input.cal_files}"
 
         set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc")
 
@@ -159,12 +159,12 @@ rule build_pht_qc:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--save_path {output.hit_pars} "
-        "--plot_path {output.plot_file} "
-        "--overwrite_files {input.overwrite_files} "
-        "--pulser_files {input.pulser_files} "
-        "--fft_files {input.fft_files} "
-        "--cal_files {input.cal_files}"
+        "--save-path {output.hit_pars} "
+        "--plot-path {output.plot_file} "
+        "--overwrite-files {input.overwrite_files} "
+        "--pulser-files {input.pulser_files} "
+        "--fft-files {input.fft_files} "
+        "--cal-files {input.cal_files}"
 
 
 fallback_qc_rule = list(workflow.rules)[-1]
@@ -221,13 +221,13 @@ rule build_per_energy_calibration:
         "--configs {configs} "
         "--tier {params.tier} "
         "--metadata {meta} "
-        "--plot_path {output.plot_file} "
-        "--results_path {output.results_file} "
-        "--save_path {output.ecal_file} "
-        "--inplot_dict {input.inplots} "
-        "--in_hit_dict {input.pht_dict} "
-        "--ctc_dict {input.ctc_dict} "
-        "--pulser_file {input.pulser} "
+        "--plot-path {output.plot_file} "
+        "--results-path {output.results_file} "
+        "--save-path {output.ecal_file} "
+        "--inplot-dict {input.inplots} "
+        "--in-hit-dict {input.pht_dict} "
+        "--ctc-dict {input.ctc_dict} "
+        "--pulser-file {input.pulser} "
         "--files {input.files}"
 
 
@@ -333,13 +333,13 @@ for key, dataset in part.datasets.items():
                 "--inplots {input.inplots} "
                 "--channel {params.channel} "
                 "--metadata {meta} "
-                "--fit_results {output.partcal_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
+                "--fit-results {output.partcal_results} "
+                "--eres-file {input.eres_file} "
+                "--hit-pars {output.hit_pars} "
+                "--plot-file {output.plot_file} "
+                "--ecal-file {input.ecal_file} "
+                "--pulser-files {input.pulser_files} "
+                "--input-files {input.files}"
 
         set_last_rule_name(
             workflow, f"{key}-{partition}-build_pht_energy_super_calibrations"
@@ -393,13 +393,13 @@ rule build_pht_energy_super_calibrations:
         "--channel {params.channel} "
         "--metadata {meta} "
         "--inplots {input.inplots} "
-        "--fit_results {output.partcal_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
+        "--fit-results {output.partcal_results} "
+        "--eres-file {input.eres_file} "
+        "--hit-pars {output.hit_pars} "
+        "--plot-file {output.plot_file} "
+        "--ecal-file {input.ecal_file} "
+        "--pulser-files {input.pulser_files} "
+        "--input-files {input.files}"
 
 
 fallback_pht_rule = list(workflow.rules)[-1]
@@ -514,13 +514,13 @@ for key, dataset in part.datasets.items():
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
                 "--channel {params.channel} "
-                "--aoe_results {output.aoe_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
+                "--aoe-results {output.aoe_results} "
+                "--eres-file {input.eres_file} "
+                "--hit-pars {output.hit_pars} "
+                "--plot-file {output.plot_file} "
+                "--ecal-file {input.ecal_file} "
+                "--pulser-files {input.pulser_files} "
+                "--input-files {input.files}"
 
         set_last_rule_name(
             workflow, f"{key}-{partition}-build_pht_aoe_calibrations"
@@ -574,13 +574,13 @@ rule build_pht_aoe_calibrations:
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
         "--channel {params.channel} "
-        "--aoe_results {output.aoe_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
+        "--aoe-results {output.aoe_results} "
+        "--eres-file {input.eres_file} "
+        "--hit-pars {output.hit_pars} "
+        "--plot-file {output.plot_file} "
+        "--ecal-file {input.ecal_file} "
+        "--pulser-files {input.pulser_files} "
+        "--input-files {input.files}"
 
 
 fallback_pht_rule = list(workflow.rules)[-1]
@@ -693,13 +693,13 @@ for key, dataset in part.datasets.items():
                 "--timestamp {params.timestamp} "
                 "--inplots {input.inplots} "
                 "--channel {params.channel} "
-                "--lq_results {output.lq_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
+                "--lq-results {output.lq_results} "
+                "--eres-file {input.eres_file} "
+                "--hit-pars {output.hit_pars} "
+                "--plot-file {output.plot_file} "
+                "--ecal-file {input.ecal_file} "
+                "--pulser-files {input.pulser_files} "
+                "--input-files {input.files}"
 
         set_last_rule_name(workflow, f"{key}-{partition}-build_pht_lq_calibration")
 
@@ -748,13 +748,13 @@ rule build_pht_lq_calibration:
         "--timestamp {params.timestamp} "
         "--inplots {input.inplots} "
         "--channel {params.channel} "
-        "--lq_results {output.lq_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
+        "--lq-results {output.lq_results} "
+        "--eres-file {input.eres_file} "
+        "--hit-pars {output.hit_pars} "
+        "--plot-file {output.plot_file} "
+        "--ecal-file {input.ecal_file} "
+        "--pulser-files {input.pulser_files} "
+        "--input-files {input.files}"
 
 
 fallback_pht_rule = list(workflow.rules)[-1]
diff --git a/workflow/rules/pht_pars_geds_fast.smk b/workflow/rules/pht_pars_geds_fast.smk
index 26aca7e..c6e0232 100644
--- a/workflow/rules/pht_pars_geds_fast.smk
+++ b/workflow/rules/pht_pars_geds_fast.smk
@@ -115,13 +115,13 @@ for key, dataset in part.datasets.items():
                 "--inplots {input.inplots} "
                 "--channel {params.channel} "
                 "--metadata {meta} "
-                "--fit_results {output.partcal_results} "
-                "--eres_file {input.eres_file} "
-                "--hit_pars {output.hit_pars} "
-                "--plot_file {output.plot_file} "
-                "--ecal_file {input.ecal_file} "
-                "--pulser_files {input.pulser_files} "
-                "--input_files {input.files}"
+                "--fit-results {output.partcal_results} "
+                "--eres-file {input.eres_file} "
+                "--hit-pars {output.hit_pars} "
+                "--plot-file {output.plot_file} "
+                "--ecal-file {input.ecal_file} "
+                "--pulser-files {input.pulser_files} "
+                "--input-files {input.files}"
 
         set_last_rule_name(workflow, f"{key}-{partition}-par_pht_fast")
         slow_rule = workflow._rules[f"{key}-{partition}-build_pht_lq_calibration"]
@@ -173,13 +173,13 @@ rule par_pht_fast:
         "--channel {params.channel} "
         "--metadata {meta} "
         "--inplots {input.inplots} "
-        "--fit_results {output.partcal_results} "
-        "--eres_file {input.eres_file} "
-        "--hit_pars {output.hit_pars} "
-        "--plot_file {output.plot_file} "
-        "--ecal_file {input.ecal_file} "
-        "--pulser_files {input.pulser_files} "
-        "--input_files {input.files}"
+        "--fit-results {output.partcal_results} "
+        "--eres-file {input.eres_file} "
+        "--hit-pars {output.hit_pars} "
+        "--plot-file {output.plot_file} "
+        "--ecal-file {input.ecal_file} "
+        "--pulser-files {input.pulser_files} "
+        "--input-files {input.files}"
 
 
 fallback_pht_rule = list(workflow.rules)[-1]
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index 1f6d36f..7cceea1 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -61,5 +61,5 @@ rule build_psp:
         "--timestamp {params.timestamp} "
         "--input {params.ro_input[raw_file]} "
         "--output {output.tier_file} "
-        "--db_file {output.db_file} "
-        "--pars_file {params.ro_input[pars_file]} "
+        "--db-file {output.db_file} "
+        "--pars-file {params.ro_input[pars_file]} "
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 8f6ee77..37c0836 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -103,10 +103,10 @@ for key, dataset in part.datasets.items():
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--channel {params.channel} "
-                "--in_plots {input.dsp_plots} "
-                "--out_plots {output.psp_plots} "
-                "--in_obj {input.dsp_objs} "
-                "--out_obj {output.psp_objs} "
+                "--in-plots {input.dsp_plots} "
+                "--out-plots {output.psp_plots} "
+                "--in-obj {input.dsp_objs} "
+                "--out-obj {output.psp_objs} "
                 "--input {input.dsp_pars} "
                 "--output {output.psp_pars} "
 
@@ -148,10 +148,10 @@ rule build_par_psp_fallback:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--in_plots {input.dsp_plots} "
-        "--out_plots {output.psp_plots} "
-        "--in_obj {input.dsp_objs} "
-        "--out_obj {output.psp_objs} "
+        "--in-plots {input.dsp_plots} "
+        "--out-plots {output.psp_plots} "
+        "--in-obj {input.dsp_objs} "
+        "--out-obj {output.psp_objs} "
         "--input {input.dsp_pars} "
         "--output {output.psp_pars} "
 
@@ -185,9 +185,9 @@ rule build_svm_psp:
     shell:
         f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}'
         "--log {log} "
-        "--train_data {input.train_data} "
-        "--train_hyperpars {input.hyperpars} "
-        "--output_file {output.dsp_pars}"
+        "--train-data {input.train_data} "
+        "--train-hyperpars {input.hyperpars} "
+        "--output-file {output.dsp_pars}"
 
 
 rule build_pars_psp_svm:
@@ -205,6 +205,6 @@ rule build_pars_psp_svm:
     shell:
         f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}'
         "--log {log} "
-        "--input_file {input.dsp_pars} "
-        "--output_file {output.dsp_pars} "
-        "--svm_file {input.svm_model}"
+        "--input-file {input.dsp_pars} "
+        "--output-file {output.dsp_pars} "
+        "--svm-file {input.svm_model}"
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index 8d6250e..7ee105f 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -73,9 +73,9 @@ for key, dataset in part.datasets.items():
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
                 "--channel {params.channel} "
-                "--save_path {output.hit_pars} "
-                "--plot_path {output.plot_file} "
-                "--phy_files {input.phy_files}"
+                "--save-path {output.hit_pars} "
+                "--plot-path {output.plot_file} "
+                "--phy-files {input.phy_files}"
 
         set_last_rule_name(workflow, f"{key}-{partition}-build_pht_qc_phy")
 
@@ -114,9 +114,9 @@ rule build_pht_qc_phy:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--save_path {output.hit_pars} "
-        "--plot_path {output.plot_file} "
-        "--phy_files {input.phy_files}"
+        "--save-path {output.hit_pars} "
+        "--plot-path {output.plot_file} "
+        "--phy-files {input.phy_files}"
 
 
 fallback_qc_rule = list(workflow.rules)[-1]
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index 2411c14..9353826 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -43,7 +43,7 @@ rule build_raw_orca:
         f'{execenv_smk_py_script(config, "build-tier-raw-orca")}'
         "--log {log} "
         f"--configs {ro(configs)} "
-        f"--chan_maps {ro(chan_maps)} "
+        f"--chan-maps {ro(chan_maps)} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "{params.ro_input} {output}"
@@ -72,7 +72,7 @@ rule build_raw_fcio:
         f'{execenv_smk_py_script(config, "build-tier-raw-fcio")}'
         "--log {log} "
         f"--configs {ro(configs)} "
-        f"--chan_maps {ro(chan_maps)} "
+        f"--chan-maps {ro(chan_maps)} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "{params.ro_input} {output}"
@@ -107,10 +107,10 @@ rule build_raw_blind:
         f'{execenv_smk_py_script(config, "build-tier-raw-blind")}'
         "--log {log} "
         f"--configs {ro(configs)} "
-        f"--chan_maps {ro(chan_maps)} "
+        f"--chan-maps {ro(chan_maps)} "
         f"--metadata {ro(meta)} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
-        "--blind_curve {params.ro_input[blind_file]} "
+        "--blind-curve {params.ro_input[blind_file]} "
         "--input {params.ro_input[tier_file]} "
         "--output {output}"
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index 3f38c3b..a2dc119 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -32,5 +32,5 @@ rule build_skm:
         "--timestamp {params.timestamp} "
         "--log {log} "
         "--datatype {params.datatype} "
-        "--evt_file {params.ro_input} "
+        "--evt-file {params.ro_input} "
         "--output {output} "
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index b954bf3..ff4e89a 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -63,6 +63,6 @@ rule build_pulser_ids:
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
-        "--tcm_files {params.input} "
-        "--pulser_file {output.pulser} "
+        "--tcm-files {params.input} "
+        "--pulser-file {output.pulser} "
         "--metadata {meta} "
diff --git a/workflow/src/legenddataflow/scripts/create_chankeylist.py b/workflow/src/legenddataflow/scripts/create_chankeylist.py
index 9566068..710b6cc 100644
--- a/workflow/src/legenddataflow/scripts/create_chankeylist.py
+++ b/workflow/src/legenddataflow/scripts/create_chankeylist.py
@@ -7,12 +7,12 @@
 
 def create_chankeylist() -> None:
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--det_status", help="det_status", type=str, required=True)
+    argparser.add_argument("--det-status", help="det_status", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channelmap", help="Channel Map", type=str, required=True)
 
-    argparser.add_argument("--output_file", help="output_file", type=str, required=True)
+    argparser.add_argument("--output-file", help="output_file", type=str, required=True)
     args = argparser.parse_args()
 
     det_status = TextDB(args.det_status, lazy=True)
diff --git a/workflow/src/legenddataflow/scripts/merge_channels.py b/workflow/src/legenddataflow/scripts/merge_channels.py
index 6fee6f5..4fe1d28 100644
--- a/workflow/src/legenddataflow/scripts/merge_channels.py
+++ b/workflow/src/legenddataflow/scripts/merge_channels.py
@@ -31,13 +31,13 @@ def merge_channels() -> None:
     )
     argparser.add_argument("--output", help="output file", type=str, required=True)
     argparser.add_argument(
-        "--in_db",
+        "--in-db",
         help="in db file (used for when lh5 files referred to in db)",
         type=str,
         required=False,
     )
     argparser.add_argument(
-        "--out_db",
+        "--out-db",
         help="lh5 file (used for when lh5 files referred to in db)",
         type=str,
         required=False,
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
index 2b0004b..16343dc 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
@@ -17,8 +17,8 @@
 
 def par_geds_dsp_dplms() -> None:
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--fft_raw_filelist", help="fft_raw_filelist", type=str)
-    argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
+    argparser.add_argument("--fft-raw-filelist", help="fft_raw_filelist", type=str)
+    argparser.add_argument("--peak-file", help="tcm_filelist", type=str, required=True)
     argparser.add_argument("--inplots", help="in_plot_path", type=str)
     argparser.add_argument("--database", help="database", type=str, required=True)
 
@@ -30,9 +30,9 @@ def par_geds_dsp_dplms() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
-    argparser.add_argument("--lh5_path", help="lh5_path", type=str, required=True)
-    argparser.add_argument("--plot_path", help="plot_path", type=str)
+    argparser.add_argument("--dsp-pars", help="dsp_pars", type=str, required=True)
+    argparser.add_argument("--lh5-path", help="lh5_path", type=str, required=True)
+    argparser.add_argument("--plot-path", help="plot_path", type=str)
 
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
index 4b755c2..6376ed5 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
@@ -28,8 +28,8 @@
 def par_geds_dsp_eopt() -> None:
     argparser = argparse.ArgumentParser()
 
-    argparser.add_argument("--peak_file", help="tcm_filelist", type=str, required=True)
-    argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
+    argparser.add_argument("--peak-file", help="tcm_filelist", type=str, required=True)
+    argparser.add_argument("--decay-const", help="decay_const", type=str, required=True)
     argparser.add_argument("--inplots", help="in_plot_path", type=str)
 
     argparser.add_argument("--log", help="log_file", type=str)
@@ -41,13 +41,13 @@ def par_geds_dsp_eopt() -> None:
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
     argparser.add_argument(
-        "--final_dsp_pars", help="final_dsp_pars", type=str, required=True
+        "--final-dsp-pars", help="final_dsp_pars", type=str, required=True
     )
     argparser.add_argument("--qbb_grid_path", help="qbb_grid_path", type=str)
     argparser.add_argument("--plot_path", help="plot_path", type=str)
 
     argparser.add_argument(
-        "--plot_save_path", help="plot_save_path", type=str, required=False
+        "--plot-save-path", help="plot_save_path", type=str, required=False
     )
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
index e9b1de6..afd4a0b 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
@@ -82,17 +82,17 @@ def get_out_data(
 
 def par_geds_dsp_evtsel() -> None:
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
+    argparser.add_argument("--raw-filelist", help="raw_filelist", type=str)
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, required=False
     )
     argparser.add_argument(
-        "--pulser_file", help="pulser_file", type=str, required=False
+        "--pulser-file", help="pulser_file", type=str, required=False
     )
 
     argparser.add_argument("--decay_const", help="decay_const", type=str, required=True)
     argparser.add_argument(
-        "--raw_cal", help="raw_cal", type=str, nargs="*", required=True
+        "--raw-cal", help="raw_cal", type=str, nargs="*", required=True
     )
 
     argparser.add_argument("--log", help="log_file", type=str)
@@ -103,7 +103,7 @@ def par_geds_dsp_evtsel() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--peak_file", help="peak_file", type=str, required=True)
+    argparser.add_argument("--peak-file", help="peak_file", type=str, required=True)
     args = argparser.parse_args()
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py
index 691a0e8..d720446 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/nopt.py
@@ -19,7 +19,7 @@ def par_geds_dsp_nopt() -> None:
     sto = lh5.LH5Store()
 
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--raw_filelist", help="raw_filelist", type=str)
+    argparser.add_argument("--raw-filelist", help="raw_filelist", type=str)
     argparser.add_argument("--database", help="database", type=str, required=True)
     argparser.add_argument("--inplots", help="inplots", type=str)
 
@@ -31,8 +31,8 @@ def par_geds_dsp_nopt() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--dsp_pars", help="dsp_pars", type=str, required=True)
-    argparser.add_argument("--plot_path", help="plot_path", type=str)
+    argparser.add_argument("--dsp-pars", help="dsp_pars", type=str, required=True)
+    argparser.add_argument("--plot-path", help="plot_path", type=str)
 
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py
index d4a1e22..268ca86 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm.py
@@ -8,10 +8,10 @@ def par_geds_dsp_svm() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--log", help="log file", type=str)
     argparser.add_argument(
-        "--output_file", help="output par file", type=str, required=True
+        "--output-file", help="output par file", type=str, required=True
     )
     argparser.add_argument(
-        "--input_file", help="input par file", type=str, required=True
+        "--input-file", help="input par file", type=str, required=True
     )
     argparser.add_argument("--svm_file", help="svm file", required=True)
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py
index 162ccfa..6ae5764 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/svm_build.py
@@ -19,13 +19,13 @@ def par_geds_dsp_svm_build() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
 
     argparser.add_argument(
-        "--output_file", help="output SVM file", type=str, required=True
+        "--output-file", help="output SVM file", type=str, required=True
     )
     argparser.add_argument(
-        "--train_data", help="input data file", type=str, required=True
+        "--train-data", help="input data file", type=str, required=True
     )
     argparser.add_argument(
-        "--train_hyperpars", help="input hyperparameter file", required=True
+        "--train-hyperpars", help="input hyperparameter file", required=True
     )
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
index 4d493a1..a86e531 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
@@ -24,16 +24,16 @@ def par_geds_dsp_tau() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_path", help="plot path", type=str, required=False)
-    argparser.add_argument("--output_file", help="output file", type=str, required=True)
+    argparser.add_argument("--plot-path", help="plot path", type=str, required=False)
+    argparser.add_argument("--output-file", help="output file", type=str, required=True)
 
     argparser.add_argument(
-        "--pulser_file", help="pulser file", type=str, required=False
+        "--pulser-file", help="pulser file", type=str, required=False
     )
 
-    argparser.add_argument("--raw_files", help="input files", nargs="*", type=str)
+    argparser.add_argument("--raw-files", help="input files", nargs="*", type=str)
     argparser.add_argument(
-        "--tcm_files", help="tcm_files", nargs="*", type=str, required=False
+        "--tcm-files", help="tcm_files", nargs="*", type=str, required=False
     )
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
index 2b6c6e1..74ece8e 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
@@ -50,14 +50,14 @@ def par_geds_hit_aoe() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("files", help="files", nargs="*", type=str)
     argparser.add_argument(
-        "--pulser_file", help="pulser_file", type=str, required=False
+        "--pulser-file", help="pulser_file", type=str, required=False
     )
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, required=False
     )
 
-    argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
-    argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
+    argparser.add_argument("--ecal-file", help="ecal_file", type=str, required=True)
+    argparser.add_argument("--eres-file", help="eres_file", type=str, required=True)
     argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
@@ -68,9 +68,9 @@ def par_geds_hit_aoe() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
-    argparser.add_argument("--hit_pars", help="hit_pars", type=str)
-    argparser.add_argument("--aoe_results", help="aoe_results", type=str)
+    argparser.add_argument("--plot-file", help="plot_file", type=str, required=False)
+    argparser.add_argument("--hit-pars", help="hit_pars", type=str)
+    argparser.add_argument("--aoe-results", help="aoe_results", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
index c67e304..c763433 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
@@ -439,15 +439,15 @@ def par_geds_hit_ecal() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--files", help="filelist", nargs="*", type=str)
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, required=False
     )
     argparser.add_argument(
-        "--pulser_file", help="pulser_file", type=str, required=False
+        "--pulser-file", help="pulser_file", type=str, required=False
     )
 
-    argparser.add_argument("--ctc_dict", help="ctc_dict", nargs="*")
-    argparser.add_argument("--in_hit_dict", help="in_hit_dict", required=False)
-    argparser.add_argument("--inplot_dict", help="inplot_dict", required=False)
+    argparser.add_argument("--ctc-dict", help="ctc_dict", nargs="*")
+    argparser.add_argument("--in-hit-dict", help="in_hit_dict", required=False)
+    argparser.add_argument("--inplot-dict", help="inplot_dict", required=False)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
@@ -459,9 +459,9 @@ def par_geds_hit_ecal() -> None:
 
     argparser.add_argument("--log", help="log_file", type=str)
 
-    argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
-    argparser.add_argument("--save_path", help="save_path", type=str)
-    argparser.add_argument("--results_path", help="results_path", type=str)
+    argparser.add_argument("--plot-path", help="plot_path", type=str, required=False)
+    argparser.add_argument("--save-path", help="save_path", type=str)
+    argparser.add_argument("--results-path", help="results_path", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
index 357fe33..b4dc3f2 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
@@ -49,14 +49,14 @@ def par_geds_hit_lq() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("files", help="files", nargs="*", type=str)
     argparser.add_argument(
-        "--pulser_file", help="pulser_file", type=str, required=False
+        "--pulser-file", help="pulser_file", type=str, required=False
     )
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, required=False
     )
 
-    argparser.add_argument("--ecal_file", help="ecal_file", type=str, required=True)
-    argparser.add_argument("--eres_file", help="eres_file", type=str, required=True)
+    argparser.add_argument("--ecal-file", help="ecal_file", type=str, required=True)
+    argparser.add_argument("--eres-file", help="eres_file", type=str, required=True)
     argparser.add_argument("--inplots", help="in_plot_path", type=str, required=False)
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
@@ -67,9 +67,9 @@ def par_geds_hit_lq() -> None:
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
-    argparser.add_argument("--plot_file", help="plot_file", type=str, required=False)
-    argparser.add_argument("--hit_pars", help="hit_pars", type=str)
-    argparser.add_argument("--lq_results", help="lq_results", type=str)
+    argparser.add_argument("--plot-file", help="plot_file", type=str, required=False)
+    argparser.add_argument("--hit-pars", help="hit_pars", type=str)
+    argparser.add_argument("--lq-results", help="lq_results", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
index acc1a32..33934c4 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
@@ -27,17 +27,17 @@
 
 def par_geds_hit_qc() -> None:
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
-    argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+    argparser.add_argument("--cal-files", help="cal_files", nargs="*", type=str)
+    argparser.add_argument("--fft-files", help="fft_files", nargs="*", type=str)
 
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, required=False
     )
     argparser.add_argument(
-        "--pulser_file", help="pulser_file", type=str, required=False
+        "--pulser-file", help="pulser_file", type=str, required=False
     )
     argparser.add_argument(
-        "--overwrite_files",
+        "--overwrite-files",
         help="overwrite_files",
         type=str,
         required=False,
@@ -53,8 +53,8 @@ def par_geds_hit_qc() -> None:
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
     argparser.add_argument("--tier", help="tier", type=str, default="hit")
 
-    argparser.add_argument("--plot_path", help="plot_path", type=str, required=False)
-    argparser.add_argument("--save_path", help="save_path", type=str)
+    argparser.add_argument("--plot-path", help="plot_path", type=str, required=False)
+    argparser.add_argument("--save-path", help="save_path", type=str)
     args = argparser.parse_args()
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
index 12c70f8..76383ef 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
@@ -224,19 +224,19 @@ def eres_func(x):
 def par_geds_pht_aoe() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
-        "--input_files", help="files", type=str, nargs="*", required=True
+        "--input-files", help="files", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
+        "--pulser-files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+        "--ecal-file", help="ecal_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+        "--eres-file", help="eres_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
         "--inplots", help="eres_file", type=str, nargs="*", required=True
@@ -251,10 +251,10 @@ def par_geds_pht_aoe() -> None:
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
     argparser.add_argument(
-        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+        "--plot-file", help="plot_file", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
-    argparser.add_argument("--aoe_results", help="aoe_results", nargs="*", type=str)
+    argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str)
+    argparser.add_argument("--aoe-results", help="aoe_results", nargs="*", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
index 560a063..6d9babf 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
@@ -424,19 +424,19 @@ def calibrate_partition(
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
-        "--input_files", help="files", type=str, nargs="*", required=True
+        "--input-files", help="files", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
+        "--pulser-files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+        "--ecal-file", help="ecal_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+        "--eres-file", help="eres_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
         "--inplots", help="eres_file", type=str, nargs="*", required=True
@@ -451,10 +451,10 @@ def calibrate_partition(
     argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument(
-        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+        "--plot-file", help="plot_file", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
-    argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+    argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str)
+    argparser.add_argument("--fit-results", help="fit_results", nargs="*", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
index 0faa42d..c3089e1 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
@@ -45,19 +45,19 @@ def run_splitter(files):
 def par_geds_pht_fast() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
-        "--input_files", help="files", type=str, nargs="*", required=True
+        "--input-files", help="files", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
+        "--pulser-files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+        "--ecal-file", help="ecal_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+        "--eres-file", help="eres_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
         "--inplots", help="eres_file", type=str, nargs="*", required=True
@@ -72,10 +72,10 @@ def par_geds_pht_fast() -> None:
     argparser.add_argument("--log", help="log_file", type=str)
 
     argparser.add_argument(
-        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+        "--plot-file", help="plot_file", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
-    argparser.add_argument("--fit_results", help="fit_results", nargs="*", type=str)
+    argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str)
+    argparser.add_argument("--fit-results", help="fit_results", nargs="*", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
index 78c8c6e..f46914c 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
@@ -241,19 +241,19 @@ def eres_func(x):
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
-        "--input_files", help="files", type=str, nargs="*", required=True
+        "--input-files", help="files", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--pulser_files", help="pulser_file", type=str, nargs="*", required=False
+        "--pulser-files", help="pulser_file", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", type=str, nargs="*", required=False
+        "--tcm-filelist", help="tcm_filelist", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--ecal_file", help="ecal_file", type=str, nargs="*", required=True
+        "--ecal-file", help="ecal_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
-        "--eres_file", help="eres_file", type=str, nargs="*", required=True
+        "--eres-file", help="eres_file", type=str, nargs="*", required=True
     )
     argparser.add_argument(
         "--inplots", help="eres_file", type=str, nargs="*", required=True
@@ -268,10 +268,10 @@ def eres_func(x):
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
     argparser.add_argument(
-        "--plot_file", help="plot_file", type=str, nargs="*", required=False
+        "--plot-file", help="plot_file", type=str, nargs="*", required=False
     )
-    argparser.add_argument("--hit_pars", help="hit_pars", nargs="*", type=str)
-    argparser.add_argument("--lq_results", help="lq_results", nargs="*", type=str)
+    argparser.add_argument("--hit-pars", help="hit_pars", nargs="*", type=str)
+    argparser.add_argument("--lq-results", help="lq_results", nargs="*", type=str)
 
     argparser.add_argument("-d", "--debug", help="debug_mode", action="store_true")
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
index af6dc95..c6ac3df 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
@@ -27,17 +27,17 @@
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--cal_files", help="cal_files", nargs="*", type=str)
-    argparser.add_argument("--fft_files", help="fft_files", nargs="*", type=str)
+    argparser.add_argument("--cal-files", help="cal_files", nargs="*", type=str)
+    argparser.add_argument("--fft-files", help="fft_files", nargs="*", type=str)
 
     argparser.add_argument(
-        "--tcm_filelist", help="tcm_filelist", nargs="*", type=str, required=False
+        "--tcm-filelist", help="tcm_filelist", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--pulser_files", help="pulser_file", nargs="*", type=str, required=False
+        "--pulser-files", help="pulser_file", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--overwrite_files", help="overwrite_files", nargs="*", type=str, required=False
+        "--overwrite-files", help="overwrite_files", nargs="*", type=str, required=False
     )
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
@@ -49,10 +49,10 @@
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
     argparser.add_argument(
-        "--plot_path", help="plot_path", type=str, nargs="*", required=False
+        "--plot-path", help="plot_path", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--save_path",
+        "--save-path",
         help="save_path",
         type=str,
         nargs="*",
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
index 38f5e20..9007ad7 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
@@ -26,7 +26,7 @@
 
 if __name__ == "__main__":
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--phy_files", help="cal_files", nargs="*", type=str)
+    argparser.add_argument("--phy-files", help="cal_files", nargs="*", type=str)
 
     argparser.add_argument("--configs", help="config", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata path", type=str, required=True)
@@ -37,10 +37,10 @@
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
     argparser.add_argument(
-        "--plot_path", help="plot_path", type=str, nargs="*", required=False
+        "--plot-path", help="plot_path", type=str, nargs="*", required=False
     )
     argparser.add_argument(
-        "--save_path",
+        "--save-path",
         help="save_path",
         type=str,
         nargs="*",
diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
index 65508a2..3ba1423 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
@@ -24,16 +24,16 @@ def par_geds_psp_average() -> None:
         "--output", help="output file", nargs="*", type=str, required=True
     )
     argparser.add_argument(
-        "--in_plots", help="input plot files", nargs="*", type=str, required=False
+        "--in-plots", help="input plot files", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--out_plots", help="output plot files", nargs="*", type=str, required=False
+        "--out-plots", help="output plot files", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--in_obj", help="input object files", nargs="*", type=str, required=False
+        "--in-obj", help="input object files", nargs="*", type=str, required=False
     )
     argparser.add_argument(
-        "--out_obj", help="output object files", nargs="*", type=str, required=False
+        "--out-obj", help="output object files", nargs="*", type=str, required=False
     )
 
     argparser.add_argument("--log", help="log_file", type=str)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
index a937458..eeaaf2b 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
@@ -24,8 +24,8 @@ def par_geds_raw_blindcal() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--files", help="files", nargs="*", type=str)
 
-    argparser.add_argument("--blind_curve", help="blind_curve", type=str)
-    argparser.add_argument("--plot_file", help="out plot path", type=str)
+    argparser.add_argument("--blind-curve", help="blind_curve", type=str)
+    argparser.add_argument("--plot-file", help="out plot path", type=str)
 
     argparser.add_argument("--meta", help="meta", type=str)
     argparser.add_argument("--configs", help="configs", type=str)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
index 7f645c1..5f60c54 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
@@ -30,9 +30,9 @@ def par_geds_raw_blindcheck() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--files", help="files", nargs="*", type=str)
     argparser.add_argument("--output", help="output file", type=str)
-    argparser.add_argument("--plot_file", help="plot file", type=str)
+    argparser.add_argument("--plot-file", help="plot file", type=str)
     argparser.add_argument(
-        "--blind_curve", help="blinding curves file", nargs="*", type=str
+        "--blind-curve", help="blinding curves file", nargs="*", type=str
     )
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
index ab5f400..4c75d62 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
@@ -21,10 +21,10 @@ def par_geds_tcm_pulser() -> None:
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
 
     argparser.add_argument(
-        "--pulser_file", help="pulser file", type=str, required=False
+        "--pulser-file", help="pulser file", type=str, required=False
     )
 
-    argparser.add_argument("--tcm_files", help="tcm_files", nargs="*", type=str)
+    argparser.add_argument("--tcm-files", help="tcm_files", nargs="*", type=str)
     args = argparser.parse_args()
 
     configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
diff --git a/workflow/src/legenddataflow/scripts/tier/dsp.py b/workflow/src/legenddataflow/scripts/tier/dsp.py
index 906985b..33d8f9c 100644
--- a/workflow/src/legenddataflow/scripts/tier/dsp.py
+++ b/workflow/src/legenddataflow/scripts/tier/dsp.py
@@ -38,12 +38,12 @@ def build_tier_dsp() -> None:
     argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
     argparser.add_argument(
-        "--pars_file", help="database file for detector", nargs="*", default=[]
+        "--pars-file", help="database file for detector", nargs="*", default=[]
     )
     argparser.add_argument("--input", help="input file", type=str)
 
     argparser.add_argument("--output", help="output file", type=str)
-    argparser.add_argument("--db_file", help="db file", type=str)
+    argparser.add_argument("--db-file", help="db file", type=str)
     args = argparser.parse_args()
 
     configs = TextDB(args.configs, lazy=True)
diff --git a/workflow/src/legenddataflow/scripts/tier/evt.py b/workflow/src/legenddataflow/scripts/tier/evt.py
index 15a76d1..195fbd6 100644
--- a/workflow/src/legenddataflow/scripts/tier/evt.py
+++ b/workflow/src/legenddataflow/scripts/tier/evt.py
@@ -33,12 +33,12 @@ def find_matching_values_with_delay(arr1, arr2, jit_delay):
 
 def build_tier_evt() -> None:
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--hit_file", help="hit file", type=str)
-    argparser.add_argument("--dsp_file", help="dsp file", type=str)
-    argparser.add_argument("--tcm_file", help="tcm file", type=str)
-    argparser.add_argument("--ann_file", help="ann file")
-    argparser.add_argument("--xtc_file", help="xtc file", type=str)
-    argparser.add_argument("--par_files", help="par files", nargs="*")
+    argparser.add_argument("--hit-file", help="hit file", type=str)
+    argparser.add_argument("--dsp-file", help="dsp file", type=str)
+    argparser.add_argument("--tcm-file", help="tcm file", type=str)
+    argparser.add_argument("--ann-file", help="ann file")
+    argparser.add_argument("--xtc-file", help="xtc file", type=str)
+    argparser.add_argument("--par-files", help="par files", nargs="*")
 
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
diff --git a/workflow/src/legenddataflow/scripts/tier/hit.py b/workflow/src/legenddataflow/scripts/tier/hit.py
index 9fd489f..ffaf25b 100644
--- a/workflow/src/legenddataflow/scripts/tier/hit.py
+++ b/workflow/src/legenddataflow/scripts/tier/hit.py
@@ -13,7 +13,7 @@
 def build_tier_hit() -> None:
     argparser = argparse.ArgumentParser()
     argparser.add_argument("--input", help="input file", type=str)
-    argparser.add_argument("--pars_file", help="hit pars file", nargs="*")
+    argparser.add_argument("--pars-file", help="hit pars file", nargs="*")
 
     argparser.add_argument("--configs", help="configs", type=str, required=True)
     argparser.add_argument("--metadata", help="metadata", type=str, required=True)
@@ -24,7 +24,7 @@ def build_tier_hit() -> None:
     argparser.add_argument("--tier", help="Tier", type=str, required=True)
 
     argparser.add_argument("--output", help="output file", type=str)
-    argparser.add_argument("--db_file", help="db file", type=str)
+    argparser.add_argument("--db-file", help="db file", type=str)
     args = argparser.parse_args()
 
     configs = TextDB(args.configs, lazy=True)
diff --git a/workflow/src/legenddataflow/scripts/tier/raw_blind.py b/workflow/src/legenddataflow/scripts/tier/raw_blind.py
index 19eb023..8fa827a 100644
--- a/workflow/src/legenddataflow/scripts/tier/raw_blind.py
+++ b/workflow/src/legenddataflow/scripts/tier/raw_blind.py
@@ -27,12 +27,12 @@ def build_tier_raw_blind() -> None:
     argparser.add_argument("--input", help="input file", type=str)
     argparser.add_argument("--output", help="output file", type=str)
     argparser.add_argument(
-        "--blind_curve", help="blinding curves file", type=str, required=True, nargs="*"
+        "--blind-curve", help="blinding curves file", type=str, required=True, nargs="*"
     )
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--configs", help="config file", type=str)
-    argparser.add_argument("--chan_maps", help="chan map", type=str)
+    argparser.add_argument("--chan-maps", help="chan map", type=str)
     argparser.add_argument("--metadata", help="metadata", type=str)
     argparser.add_argument("--log", help="log file", type=str)
     args = argparser.parse_args()
diff --git a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py
index fefc8a1..c52f441 100644
--- a/workflow/src/legenddataflow/scripts/tier/raw_fcio.py
+++ b/workflow/src/legenddataflow/scripts/tier/raw_fcio.py
@@ -17,7 +17,7 @@ def build_tier_raw_fcio() -> None:
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--configs", help="config file", type=str)
-    argparser.add_argument("--chan_maps", help="chan map", type=str)
+    argparser.add_argument("--chan-maps", help="chan map", type=str)
     argparser.add_argument("--log", help="log file", type=str)
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/tier/raw_orca.py b/workflow/src/legenddataflow/scripts/tier/raw_orca.py
index 00d7751..ca6a9f3 100644
--- a/workflow/src/legenddataflow/scripts/tier/raw_orca.py
+++ b/workflow/src/legenddataflow/scripts/tier/raw_orca.py
@@ -17,7 +17,7 @@ def build_tier_raw_orca() -> None:
     argparser.add_argument("--datatype", help="Datatype", type=str, required=True)
     argparser.add_argument("--timestamp", help="Timestamp", type=str, required=True)
     argparser.add_argument("--configs", help="config file", type=str)
-    argparser.add_argument("--chan_maps", help="chan map", type=str)
+    argparser.add_argument("--chan-maps", help="chan map", type=str)
     argparser.add_argument("--log", help="log file")
     args = argparser.parse_args()
 
diff --git a/workflow/src/legenddataflow/scripts/tier/skm.py b/workflow/src/legenddataflow/scripts/tier/skm.py
index a698629..f194a00 100644
--- a/workflow/src/legenddataflow/scripts/tier/skm.py
+++ b/workflow/src/legenddataflow/scripts/tier/skm.py
@@ -23,7 +23,7 @@ def get_all_out_fields(input_table, out_fields, current_field=""):
 
 def build_tier_skm() -> None:
     argparser = argparse.ArgumentParser()
-    argparser.add_argument("--evt_file", help="evt file", required=True)
+    argparser.add_argument("--evt-file", help="evt file", required=True)
     argparser.add_argument("--configs", help="configs", required=True)
     argparser.add_argument("--datatype", help="datatype", required=True)
     argparser.add_argument("--timestamp", help="timestamp", required=True)

From ccfcbce091ed0bdde6605d1b445ae3360c3cb72f Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 5 Feb 2025 12:25:04 +0100
Subject: [PATCH 095/101] rename execenv_smk_py_script to execenv_pyexe

---
 workflow/rules/ann.smk                  | 12 +++++-----
 workflow/rules/blinding_calibration.smk | 11 ++++------
 workflow/rules/blinding_check.smk       | 10 ++++-----
 workflow/rules/chanlist_gen.smk         | 11 +++++-----
 workflow/rules/channel_merge.smk        | 10 ++++-----
 workflow/rules/dsp.smk                  |  5 ++---
 workflow/rules/dsp_pars_geds.smk        | 23 +++++++-------------
 workflow/rules/evt.smk                  | 11 ++++------
 workflow/rules/hit.smk                  |  5 ++---
 workflow/rules/hit_pars_geds.smk        | 14 +++++-------
 workflow/rules/pht.smk                  |  5 ++---
 workflow/rules/pht_pars_geds.smk        | 29 +++++++++----------------
 workflow/rules/pht_pars_geds_fast.smk   |  8 +++----
 workflow/rules/psp.smk                  |  5 ++---
 workflow/rules/psp_pars_geds.smk        | 14 +++++-------
 workflow/rules/qc_phy.smk               | 14 +++++-------
 workflow/rules/raw.smk                  | 11 ++++------
 workflow/rules/skm.smk                  |  5 ++---
 workflow/rules/tcm.smk                  |  8 +++----
 workflow/src/legenddataflow/__init__.py |  4 ++--
 workflow/src/legenddataflow/execenv.py  | 13 +++++------
 21 files changed, 88 insertions(+), 140 deletions(-)

diff --git a/workflow/rules/ann.smk b/workflow/rules/ann.smk
index 5cdd016..1e48623 100644
--- a/workflow/rules/ann.smk
+++ b/workflow/rules/ann.smk
@@ -9,7 +9,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 rule build_ann:
@@ -30,11 +30,10 @@ rule build_ann:
         runtime=300,
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-dsp") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
-        f"--tier ann "
+        "--tier ann "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--input {input.dsp_file} "
@@ -61,11 +60,10 @@ rule build_pan:
         runtime=300,
         mem_swap=lambda wildcards: 25 if wildcards.datatype == "cal" else 15,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-dsp") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
-        f"--tier pan "
+        "--tier pan "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--input {input.dsp_file} "
diff --git a/workflow/rules/blinding_calibration.smk b/workflow/rules/blinding_calibration.smk
index 31e71a8..a4dcc1e 100644
--- a/workflow/rules/blinding_calibration.smk
+++ b/workflow/rules/blinding_calibration.smk
@@ -12,7 +12,7 @@ from legenddataflow.patterns import (
     get_pattern_log_channel,
 )
 from pathlib import Path
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 rule build_blinding_calibration:
@@ -38,8 +38,7 @@ rule build_blinding_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-raw-blindcal")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-raw-blindcal") + "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -66,8 +65,7 @@ rule build_plts_blinding:
     group:
         "merge-blindcal"
     shell:
-        f'{execenv_smk_py_script(config, "merge-channels")}'
-        "--input {input} "
+        execenv_pyexe(config, "merge-channels") + "--input {input} "
         "--output {output} "
 
 
@@ -88,6 +86,5 @@ rule build_pars_blinding:
     group:
         "merge-blindcal"
     shell:
-        f'{execenv_smk_py_script(config, "merge-channels")}'
-        "--input {input.infiles} "
+        execenv_pyexe(config, "merge-channels") + "--input {input.infiles} "
         "--output {output} "
diff --git a/workflow/rules/blinding_check.smk b/workflow/rules/blinding_check.smk
index 2bee385..0822d9d 100644
--- a/workflow/rules/blinding_check.smk
+++ b/workflow/rules/blinding_check.smk
@@ -12,7 +12,7 @@ from legenddataflow.patterns import (
     get_pattern_plts,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 from pathlib import Path
 
 
@@ -39,8 +39,7 @@ rule build_blinding_check:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-raw-blindcheck")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-raw-blindcheck") + "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -67,8 +66,7 @@ rule build_plts_raw:
     group:
         "merge-raw"
     shell:
-        f'{execenv_smk_py_script(config, "merge-channels")}'
-        "--input {input} "
+        execenv_pyexe(config, "merge-channels") + "--input {input} "
         "--output {output} "
 
 
@@ -91,4 +89,4 @@ rule build_pars_raw:
     group:
         "merge-raw"
     shell:
-        f'{execenv_smk_py_script(config, "merge-channels")}'
+        execenv_pyexe(config, "merge-channels")
diff --git a/workflow/rules/chanlist_gen.smk b/workflow/rules/chanlist_gen.smk
index 750104b..0f30cc0 100644
--- a/workflow/rules/chanlist_gen.smk
+++ b/workflow/rules/chanlist_gen.smk
@@ -9,7 +9,7 @@ from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_plts_tmp_channel,
 )
-from legenddataflow import execenv_smk_py_script
+from legenddataflow import execenv_pyexe
 from legenddataflow.utils import filelist_path
 
 
@@ -23,10 +23,11 @@ def get_chanlist(setup, keypart, workflow, config, det_status, chan_maps):
         f"all-{key.experiment}-{key.period}-{key.run}-cal-{key.timestamp}-channels.chankeylist.{random.randint(0,99999):05d}",
     )
 
-    cmd = execenv_smk_py_script(config, "create-chankeylist")
-    cmd += f" --det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
-    cmd += f"--datatype cal --output-file {output_file}"
-    os.system(cmd)
+    os.system(
+        execenv_pyexe(config, "create-chankeylist")
+        + "--det-status {det_status} --channelmap {chan_maps} --timestamp {key.timestamp} "
+        "--datatype cal --output-file {output_file}"
+    )
 
     with open(output_file) as r:
         chan_list = r.read().splitlines()
diff --git a/workflow/rules/channel_merge.smk b/workflow/rules/channel_merge.smk
index b221fc3..42b06c9 100644
--- a/workflow/rules/channel_merge.smk
+++ b/workflow/rules/channel_merge.smk
@@ -8,7 +8,7 @@ from legenddataflow.patterns import (
 )
 from legenddataflow.utils import set_last_rule_name
 import inspect
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
     if lh5_tier is None:
@@ -31,7 +31,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
         group:
             f"merge-{tier}"
         shell:
-            f'{execenv_smk_py_script(config, "merge-channels")}'
+            execenv_pyexe(config, "merge-channels") + \
             "--input {input} "
             "--output {output} "
             "--channelmap {meta} "
@@ -64,7 +64,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
         group:
             f"merge-{tier}"
         shell:
-            f'{execenv_smk_py_script(config, "merge-channels")}'
+            execenv_pyexe(config, "merge-channels") + \
             "--input {input} "
             "--output {output} "
             "--timestamp {params.timestamp} "
@@ -97,7 +97,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
             group:
                 f"merge-{tier}"
             shell:
-                f'{execenv_smk_py_script(config, "merge-channels")}'
+                execenv_pyexe(config, "merge-channels") + \
                 "--input {input} "
                 "--output {output} "
                 "--timestamp {params.timestamp} "
@@ -144,7 +144,7 @@ def build_merge_rules(tier, lh5_merge=False, lh5_tier=None):
             f"merge-{tier}"
         run:
             shell_string = (
-                f'{execenv_smk_py_script(config, "merge-channels")}'
+                execenv_pyexe(config, "merge-channels") + \
                 "--output {output.out_file} "
                 "--input {input.in_files} "
                 "--timestamp {params.timestamp} "
diff --git a/workflow/rules/dsp.smk b/workflow/rules/dsp.smk
index 20c5d38..f9a9299 100644
--- a/workflow/rules/dsp.smk
+++ b/workflow/rules/dsp.smk
@@ -14,7 +14,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 dsp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -52,8 +52,7 @@ rule build_dsp:
         runtime=300,
         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-dsp") + "--log {log} "
         "--tier dsp "
         f"--configs {ro(configs)} "
         "--metadata {meta} "
diff --git a/workflow/rules/dsp_pars_geds.smk b/workflow/rules/dsp_pars_geds.smk
index 8d1f075..52fae7c 100644
--- a/workflow/rules/dsp_pars_geds.smk
+++ b/workflow/rules/dsp_pars_geds.smk
@@ -13,7 +13,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 rule build_pars_dsp_tau_geds:
@@ -36,8 +36,7 @@ rule build_pars_dsp_tau_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-tau")}'
-        "--configs {configs} "
+        execenv_pyexe(config, "par-geds-dsp-tau") + "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -70,8 +69,7 @@ rule build_pars_evtsel_geds:
         runtime=300,
         mem_swap=70,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-evtsel")}'
-        "--configs {configs} "
+        execenv_pyexe(config, "par-geds-dsp-evtsel") + "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -107,8 +105,7 @@ rule build_pars_dsp_nopt_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-nopt")}'
-        "--database {input.database} "
+        execenv_pyexe(config, "par-geds-dsp-nopt") + "--database {input.database} "
         "--configs {configs} "
         "--log {log} "
         "--datatype {params.datatype} "
@@ -144,9 +141,8 @@ rule build_pars_dsp_dplms_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-dplms")}'
+        execenv_pyexe(config, "par-geds-dsp-dplms") + "--peak-file {input.peak_file} "
         "--fft-raw-filelist {input.fft_files} "
-        "--peak-file {input.peak_file} "
         "--database {input.database} "
         "--inplots {input.inplots} "
         "--configs {configs} "
@@ -182,8 +178,7 @@ rule build_pars_dsp_eopt_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-eopt")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-dsp-eopt") + "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -213,8 +208,7 @@ rule build_svm_dsp_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-dsp-svm-build") + "--log {log} "
         "--train-data {input.train_data} "
         "--train-hyperpars {input.hyperpars} "
         "--output-file {output.dsp_pars}"
@@ -233,8 +227,7 @@ rule build_pars_dsp_svm_geds:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-dsp-svm") + "--log {log} "
         "--input-file {input.dsp_pars} "
         "--output-file {output.dsp_pars} "
         "--svm-file {input.svm_file}"
diff --git a/workflow/rules/evt.smk b/workflow/rules/evt.smk
index cc72249..6a8147e 100644
--- a/workflow/rules/evt.smk
+++ b/workflow/rules/evt.smk
@@ -9,7 +9,7 @@ from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_log_concat,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 rule build_evt:
@@ -44,8 +44,7 @@ rule build_evt:
         mem_swap=50,
     run:
         shell_string = (
-            f'{execenv_smk_py_script(config, "build-tier-evt")}'
-            f"--configs {ro(configs)} "
+            execenv_pyexe(config, "build-tier-evt") + f"--configs {ro(configs)} "
             f"--metadata {ro(meta)} "
             "--log {log} "
             "--tier {params.tier} "
@@ -96,8 +95,7 @@ rule build_pet:
         mem_swap=50,
     run:
         shell_string = (
-            f'{execenv_smk_py_script(config, "build-tier-evt")}'
-            f"--configs {ro(configs)} "
+            execenv_pyexe(config, "build-tier-evt") + f"--configs {ro(configs)} "
             f"--metadata {ro(meta)} "
             "--log {log} "
             "--tier {params.tier} "
@@ -144,8 +142,7 @@ for evt_tier in ("evt", "pet"):
         group:
             "tier-evt"
         shell:
-            f'{execenv_smk_py_script(config, "lh5concat")}'
-            "--verbose --overwrite "
+            execenv_pyexe(config, "lh5concat") + "--verbose --overwrite "
             "--output {output} "
             "-- {params.ro_input} &> {log}"
 
diff --git a/workflow/rules/hit.smk b/workflow/rules/hit.smk
index 469b0f5..3d4926c 100644
--- a/workflow/rules/hit.smk
+++ b/workflow/rules/hit.smk
@@ -14,7 +14,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars_tmp,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 hit_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -50,8 +50,7 @@ rule build_hit:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-hit")}'
-        f"--configs {ro(configs)} "
+        execenv_pyexe(config, "build-tier-hit") + f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--log {log} "
         "--tier {params.tier} "
diff --git a/workflow/rules/hit_pars_geds.smk b/workflow/rules/hit_pars_geds.smk
index 0b0aef6..b60b88c 100644
--- a/workflow/rules/hit_pars_geds.smk
+++ b/workflow/rules/hit_pars_geds.smk
@@ -18,7 +18,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 # This rule builds the qc using the calibration dsp files and fft files
@@ -46,8 +46,7 @@ rule build_qc:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-hit-qc")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-hit-qc") + "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -94,8 +93,7 @@ rule build_energy_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-hit-ecal") + "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -142,8 +140,7 @@ rule build_aoe_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-hit-aoe")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-hit-aoe") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
@@ -188,8 +185,7 @@ rule build_lq_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-hit-lq")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-hit-lq") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
diff --git a/workflow/rules/pht.smk b/workflow/rules/pht.smk
index 447cee0..1f75c4f 100644
--- a/workflow/rules/pht.smk
+++ b/workflow/rules/pht.smk
@@ -15,7 +15,7 @@ from legenddataflow.patterns import (
     get_pattern_pars_tmp,
     get_pattern_log,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 pht_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -53,8 +53,7 @@ rule build_pht:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-hit")}'
-        f"--configs {ro(configs)} "
+        execenv_pyexe(config, "build-tier-hit") + f"--configs {ro(configs)} "
         "--metadata {meta} "
         "--log {log} "
         "--tier {params.tier} "
diff --git a/workflow/rules/pht_pars_geds.smk b/workflow/rules/pht_pars_geds.smk
index cec57b5..306a46c 100644
--- a/workflow/rules/pht_pars_geds.smk
+++ b/workflow/rules/pht_pars_geds.smk
@@ -20,7 +20,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 pht_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -101,8 +101,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 30,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-pht-qc")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-pht-qc") + "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
                 "--datatype {params.datatype} "
@@ -152,8 +151,7 @@ rule build_pht_qc:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-pht-qc")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-pht-qc") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
@@ -213,8 +211,7 @@ rule build_per_energy_calibration:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-hit-ecal")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-hit-ecal") + "--log {log} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
         "--channel {params.channel} "
@@ -325,8 +322,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-pht-ecal-part") + "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
@@ -385,8 +381,7 @@ rule build_pht_energy_super_calibrations:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-pht-ecal-part")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-pht-ecal-part") + "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -506,8 +501,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-pht-aoe") + "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
                 "--datatype {params.datatype} "
@@ -566,8 +560,7 @@ rule build_pht_aoe_calibrations:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-pht-aoe")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-pht-aoe") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
@@ -685,8 +678,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 15,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-pht-lq")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-pht-lq") + "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
                 "--datatype {params.datatype} "
@@ -740,8 +732,7 @@ rule build_pht_lq_calibration:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-pht-lq")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-pht-lq") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
diff --git a/workflow/rules/pht_pars_geds_fast.smk b/workflow/rules/pht_pars_geds_fast.smk
index c6e0232..9f4d7b4 100644
--- a/workflow/rules/pht_pars_geds_fast.smk
+++ b/workflow/rules/pht_pars_geds_fast.smk
@@ -11,7 +11,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 pht_fast_rules = {}
@@ -106,8 +106,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 12,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-pht-fast")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-pht-fast") + "--log {log} "
                 "--configs {configs} "
                 "--metadata {meta} "
                 "--datatype {params.datatype} "
@@ -164,8 +163,7 @@ rule par_pht_fast:
         mem_swap=50,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-pht-fast")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-pht-fast") + "--log {log} "
         "--configs {configs} "
         "--metadata {meta} "
         "--datatype {params.datatype} "
diff --git a/workflow/rules/psp.smk b/workflow/rules/psp.smk
index 7cceea1..3dc8c3f 100644
--- a/workflow/rules/psp.smk
+++ b/workflow/rules/psp.smk
@@ -14,7 +14,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -52,8 +52,7 @@ rule build_psp:
         runtime=300,
         mem_swap=lambda wildcards: 35 if wildcards.datatype == "cal" else 25,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-dsp")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-dsp") + "--log {log} "
         "--tier psp "
         f"--configs {ro(configs)} "
         "--metadata {meta} "
diff --git a/workflow/rules/psp_pars_geds.smk b/workflow/rules/psp_pars_geds.smk
index 37c0836..db4fa35 100644
--- a/workflow/rules/psp_pars_geds.smk
+++ b/workflow/rules/psp_pars_geds.smk
@@ -14,7 +14,7 @@ from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_tier,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 psp_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -97,8 +97,7 @@ for key, dataset in part.datasets.items():
             resources:
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-psp-average")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-psp-average") + "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
@@ -142,8 +141,7 @@ rule build_par_psp_fallback:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-psp-average")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-psp-average") + "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -183,8 +181,7 @@ rule build_svm_psp:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-svm-build")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-dsp-svm-build") + "--log {log} "
         "--train-data {input.train_data} "
         "--train-hyperpars {input.hyperpars} "
         "--output-file {output.dsp_pars}"
@@ -203,8 +200,7 @@ rule build_pars_psp_svm:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-dsp-svm")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-dsp-svm") + "--log {log} "
         "--input-file {input.dsp_pars} "
         "--output-file {output.dsp_pars} "
         "--svm-file {input.svm_model}"
diff --git a/workflow/rules/qc_phy.smk b/workflow/rules/qc_phy.smk
index 7ee105f..aaea3c0 100644
--- a/workflow/rules/qc_phy.smk
+++ b/workflow/rules/qc_phy.smk
@@ -11,7 +11,7 @@ from legenddataflow.patterns import (
     get_pattern_log,
     get_pattern_pars,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 intier = "psp"
 
@@ -67,8 +67,7 @@ for key, dataset in part.datasets.items():
                 mem_swap=len(part.get_filelists(partition, key, intier)) * 20,
                 runtime=300,
             shell:
-                f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}'
-                "--log {log} "
+                execenv_pyexe(config, "par-geds-pht-qc-phy") + "--log {log} "
                 "--configs {configs} "
                 "--datatype {params.datatype} "
                 "--timestamp {params.timestamp} "
@@ -108,8 +107,7 @@ rule build_pht_qc_phy:
         mem_swap=60,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-pht-qc-phy")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-pht-qc-phy") + "--log {log} "
         "--configs {configs} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -146,8 +144,7 @@ rule build_plts_pht_phy:
     group:
         "merge-hit"
     shell:
-        f'{execenv_smk_py_script(config, "merge-channels")}'
-        "--input {input} "
+        execenv_pyexe(config, "merge-channels") + "--input {input} "
         "--output {output} "
 
 
@@ -168,6 +165,5 @@ rule build_pars_pht_phy:
     group:
         "merge-hit"
     shell:
-        f'{execenv_smk_py_script(config, "merge-channels")}'
-        "--input {input.infiles} "
+        execenv_pyexe(config, "merge-channels") + "--input {input.infiles} "
         "--output {output} "
diff --git a/workflow/rules/raw.smk b/workflow/rules/raw.smk
index 9353826..9b60a06 100644
--- a/workflow/rules/raw.smk
+++ b/workflow/rules/raw.smk
@@ -7,7 +7,7 @@ from legenddataflow.patterns import (
 )
 from legenddataflow.utils import set_last_rule_name
 from legenddataflow.create_pars_keylist import ParsKeyResolve
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 raw_par_catalog = ParsKeyResolve.get_par_catalog(
     ["-*-*-*-cal"],
@@ -40,8 +40,7 @@ rule build_raw_orca:
         mem_swap=110,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-raw-orca")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-raw-orca") + "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan-maps {ro(chan_maps)} "
         "--datatype {params.datatype} "
@@ -69,8 +68,7 @@ rule build_raw_fcio:
         mem_swap=110,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-raw-fcio")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-raw-fcio") + "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan-maps {ro(chan_maps)} "
         "--datatype {params.datatype} "
@@ -104,8 +102,7 @@ rule build_raw_blind:
         mem_swap=110,
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-raw-blind")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-raw-blind") + "--log {log} "
         f"--configs {ro(configs)} "
         f"--chan-maps {ro(chan_maps)} "
         f"--metadata {ro(meta)} "
diff --git a/workflow/rules/skm.smk b/workflow/rules/skm.smk
index a2dc119..d4f040b 100644
--- a/workflow/rules/skm.smk
+++ b/workflow/rules/skm.smk
@@ -8,7 +8,7 @@ from legenddataflow.patterns import (
     get_pattern_pars,
     get_pattern_log_concat,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 rule build_skm:
@@ -27,8 +27,7 @@ rule build_skm:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-skm")}'
-        f"--configs {ro(configs)} "
+        execenv_pyexe(config, "build-tier-skm") + f"--configs {ro(configs)} "
         "--timestamp {params.timestamp} "
         "--log {log} "
         "--datatype {params.datatype} "
diff --git a/workflow/rules/tcm.smk b/workflow/rules/tcm.smk
index ff4e89a..f4e7b2c 100644
--- a/workflow/rules/tcm.smk
+++ b/workflow/rules/tcm.smk
@@ -8,7 +8,7 @@ from legenddataflow.patterns import (
     get_pattern_pars_tmp_channel,
     get_pattern_log_channel,
 )
-from legenddataflow.execenv import execenv_smk_py_script
+from legenddataflow.execenv import execenv_pyexe
 
 
 # This rule builds the tcm files each raw file
@@ -29,8 +29,7 @@ rule build_tier_tcm:
         runtime=300,
         mem_swap=20,
     shell:
-        f'{execenv_smk_py_script(config, "build-tier-tcm")}'
-        "--log {log} "
+        execenv_pyexe(config, "build-tier-tcm") + "--log {log} "
         f"--configs {ro(configs)} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
@@ -57,8 +56,7 @@ rule build_pulser_ids:
     resources:
         runtime=300,
     shell:
-        f'{execenv_smk_py_script(config, "par-geds-tcm-pulser")}'
-        "--log {log} "
+        execenv_pyexe(config, "par-geds-tcm-pulser") + "--log {log} "
         f"--configs {ro(configs)} "
         "--datatype {params.datatype} "
         "--timestamp {params.timestamp} "
diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py
index ca8ddbb..5392601 100644
--- a/workflow/src/legenddataflow/__init__.py
+++ b/workflow/src/legenddataflow/__init__.py
@@ -2,8 +2,8 @@
 from .create_pars_keylist import ParsKeyResolve
 from .execenv import (
     execenv_prefix,
+    execenv_pyexe,
     execenv_python,
-    execenv_smk_py_script,
 )
 from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey
 from .pars_loading import ParsCatalog
@@ -21,8 +21,8 @@
     "ParsKeyResolve",
     "ProcessingFileKey",
     "execenv_prefix",
+    "execenv_pyexe",
     "execenv_python",
-    "execenv_smk_py_script",
     "subst_vars",
     "subst_vars_in_snakemake_config",
     "unix_time",
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index 9fd2ac0..a34e805 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -60,18 +60,15 @@ def execenv_python(config, aslist=False):
     return " ".join(cmdline), cmdenv
 
 
-def execenv_smk_py_script(config, scriptname, aslist=False):
-    """Returns the command used to run a Python script for a Snakemake rule.
+def execenv_pyexe(config, exename):
+    """Returns the command used to run a legend-dataflow executable for a Snakemake rule.
 
-    For example: `apptainer run image.sif python path/to/script.py`
+    For example: `apptainer run image.sif path/to/bindir/<exename>`
     """
-    config = AttrsDict(config)
-
     cmdline, _ = execenv_prefix(config, aslist=True)
-    cmdline.append(f"{config.paths.install}/bin/{scriptname} ")
+    # NOTE: space after the executable name
+    cmdline.append(f"{config.paths.install}/bin/{exename} ")
 
-    if aslist:
-        return cmdline
     return " ".join(cmdline)
 
 

From 401814af092c08560e820ea10484b92e5913167d Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 5 Feb 2025 23:25:34 +0100
Subject: [PATCH 096/101] improve execenv code and add tests

---
 tests/test_execenv.py                   |  91 ++++++++++++++++
 workflow/src/legenddataflow/__init__.py |   2 -
 workflow/src/legenddataflow/execenv.py  | 136 +++++++++++++++---------
 3 files changed, 175 insertions(+), 54 deletions(-)
 create mode 100644 tests/test_execenv.py

diff --git a/tests/test_execenv.py b/tests/test_execenv.py
new file mode 100644
index 0000000..4aa354d
--- /dev/null
+++ b/tests/test_execenv.py
@@ -0,0 +1,91 @@
+import os
+
+import pytest
+from dbetto import AttrsDict
+from legenddataflow import execenv
+
+os.environ["XDG_RUNTIME_DIR"] = "whatever"
+
+
+@pytest.fixture(scope="module")
+def config():
+    return AttrsDict(
+        {
+            "paths": {"install": ".snakemake/software"},
+            "execenv": {
+                "cmd": "apptainer exec",
+                "arg": "image.sif",
+                "env": {
+                    "VAR1": "val1",
+                    "VAR2": "val2",
+                },
+            },
+        }
+    )
+
+
+def test_execenv2str():
+    assert (
+        execenv._execenv2str(["cmd", "-v", "opt"], {"VAR1": "val1", "VAR2": "val2"})
+        == "VAR1=val1 VAR2=val2 cmd -v opt"
+    )
+
+
+def test_execenv_prefix(config):
+    cmd_expr, cmd_env = execenv.execenv_prefix(config, as_string=False)
+
+    assert cmd_expr == [
+        "apptainer",
+        "exec",
+        "--env=VAR1=val1",
+        "--env=VAR2=val2",
+        "--bind=whatever",
+        "image.sif",
+    ]
+    assert cmd_env == config.execenv.env
+
+    config.execenv.cmd = "docker run"
+    cmd_expr, cmd_env = execenv.execenv_prefix(config, as_string=False)
+
+    assert cmd_expr == [
+        "docker",
+        "run",
+        "--env=VAR1=val1",
+        "--env=VAR2=val2",
+        "--volume=whatever:whatever",
+        "image.sif",
+    ]
+    assert cmd_env == config.execenv.env
+
+    config.execenv.cmd = "shifter"
+    config.execenv.arg = "--image=legendexp/legend-base:latest"
+    cmd_expr, cmd_env = execenv.execenv_prefix(config, as_string=False)
+
+    assert cmd_expr == [
+        "shifter",
+        "--env=VAR1=val1",
+        "--env=VAR2=val2",
+        "--volume=whatever:whatever",
+        "--image=legendexp/legend-base:latest",
+    ]
+    assert cmd_env == config.execenv.env
+
+    cmd_str = execenv.execenv_prefix(config, as_string=True)
+    assert cmd_str == (
+        "VAR1=val1 VAR2=val2 "
+        "shifter --env=VAR1=val1 --env=VAR2=val2 "
+        "--volume=whatever:whatever "
+        "--image=legendexp/legend-base:latest "
+    )
+
+
+def test_execenv_pyexe(config):
+    cmd_str = execenv.execenv_pyexe(config, "dio-boe")
+
+    assert cmd_str == (
+        "VAR1=val1 VAR2=val2 "
+        "shifter --env=VAR1=val1 --env=VAR2=val2 "
+        "--volume=whatever:whatever "
+        "--image=legendexp/legend-base:latest "
+        ".snakemake/software/bin/dio-boe "
+    )
diff --git a/workflow/src/legenddataflow/__init__.py b/workflow/src/legenddataflow/__init__.py
index 5392601..a8ba884 100644
--- a/workflow/src/legenddataflow/__init__.py
+++ b/workflow/src/legenddataflow/__init__.py
@@ -3,7 +3,6 @@
 from .execenv import (
     execenv_prefix,
     execenv_pyexe,
-    execenv_python,
 )
 from .FileKey import ChannelProcKey, FileKey, ProcessingFileKey
 from .pars_loading import ParsCatalog
@@ -22,7 +21,6 @@
     "ProcessingFileKey",
     "execenv_prefix",
     "execenv_pyexe",
-    "execenv_python",
     "subst_vars",
     "subst_vars_in_snakemake_config",
     "unix_time",
diff --git a/workflow/src/legenddataflow/execenv.py b/workflow/src/legenddataflow/execenv.py
index a34e805..c11b372 100644
--- a/workflow/src/legenddataflow/execenv.py
+++ b/workflow/src/legenddataflow/execenv.py
@@ -7,6 +7,7 @@
 import shutil
 import subprocess
 from pathlib import Path
+from typing import Iterable, Mapping
 
 import colorlog
 import dbetto
@@ -17,68 +18,105 @@
 log = logging.getLogger(__name__)
 
 
-def execenv_prefix(config, aslist=False):
+def _execenv2str(cmd_expr: Iterable, cmd_env: Mapping) -> str:
+    return " ".join([f"{k}={v}" for k, v in cmd_env.items()]) + " " + " ".join(cmd_expr)
+
+
+def apptainer_env_vars(cmdenv: Mapping) -> list[str]:
+    return [f"--env={var}={val}" for var, val in cmdenv.items()]
+
+
+def docker_env_vars(cmdenv: Mapping) -> list[str]:
+    # same syntax
+    return apptainer_env_vars(cmdenv)
+
+
+def shifter_env_vars(cmdenv: Mapping) -> list[str]:
+    # same syntax
+    return apptainer_env_vars(cmdenv)
+
+
+def execenv_prefix(
+    config: AttrsDict, as_string: bool = True
+) -> str | tuple[list, dict]:
     """Returns the software environment command prefix.
 
     For example: `apptainer run image.sif`
+
+    Note
+    ----
+    If `as_string` is True, a space is appended to the returned string.
     """
     config = AttrsDict(config)
 
+    cmdline = []
+    if "env" in config.execenv:
+        cmdenv = config.execenv.env
+
     if "execenv" in config and "cmd" in config.execenv and "arg" in config.execenv:
         cmdline = shlex.split(config.execenv.cmd)
-        if "env" in config.execenv:
-            # FIXME: this is not portable, only works with Apptainer and Docker
-            cmdline += [f"--env={var}={val}" for var, val in config.execenv.env.items()]
 
-        cmdenv = {}
+        has_xdg = False
         xdg_runtime_dir = os.getenv("XDG_RUNTIME_DIR")
         if xdg_runtime_dir:
-            cmdenv["APPTAINER_BINDPATH"] = xdg_runtime_dir
+            has_xdg = True
+
+        if "env" in config.execenv:
+            if any(exe in config.execenv.cmd for exe in ("apptainer", "singularity")):
+                cmdline += apptainer_env_vars(config.execenv.env)
+                if has_xdg:
+                    cmdline += [f"--bind={xdg_runtime_dir}"]
+
+            elif "docker" in config.execenv.cmd:
+                cmdline += docker_env_vars(config.execenv.env)
+
+            elif "shifter" in config.execenv.cmd:
+                cmdline += shifter_env_vars(config.execenv.env)
+
+            if (
+                any(exe in config.execenv.cmd for exe in ("docker", "shifter"))
+                and has_xdg
+            ):
+                cmdline += [f"--volume={xdg_runtime_dir}:{xdg_runtime_dir}"]
 
+        # now we can add the arguments
         cmdline += shlex.split(config.execenv.arg)
-    else:
-        cmdenv = {}
-        cmdline = []
 
-    if aslist:
-        return cmdline, cmdenv
-    return " ".join(cmdline), cmdenv
+    if as_string:
+        return _execenv2str(cmdline, cmdenv) + " "
+
+    return cmdline, cmdenv
 
 
-def execenv_python(config, aslist=False):
+def execenv_pyexe(
+    config: AttrsDict, exename: str, as_string: bool = True
+) -> str | tuple[list, dict]:
     """Returns the Python interpreter command.
 
     For example: `apptainer run image.sif python`
+
+    Note
+    ----
+    If `as_string` is True, a space is appended to the returned string.
     """
     config = AttrsDict(config)
 
-    cmdline, cmdenv = execenv_prefix(config, aslist=True)
-    cmdline.append(f"{config.paths.install}/bin/python")
-
-    if aslist:
-        return cmdline, cmdenv
-    return " ".join(cmdline), cmdenv
+    cmdline, cmdenv = execenv_prefix(config, as_string=False)
+    cmdline.append(f"{config.paths.install}/bin/{exename}")
 
+    if as_string:
+        return _execenv2str(cmdline, cmdenv) + " "
 
-def execenv_pyexe(config, exename):
-    """Returns the command used to run a legend-dataflow executable for a Snakemake rule.
-
-    For example: `apptainer run image.sif path/to/bindir/<exename>`
-    """
-    cmdline, _ = execenv_prefix(config, aslist=True)
-    # NOTE: space after the executable name
-    cmdline.append(f"{config.paths.install}/bin/{exename} ")
-
-    return " ".join(cmdline)
+    return cmdline, cmdenv
 
 
 def dataprod() -> None:
-    """dataprod's command-line interface for installing and loading the software in the data production environment.
+    """dataprod's CLI for installing and loading the software in the data production environment.
 
     .. code-block:: console
 
       $ dataprod --help
-      $ dataprod exec --help  # help section for a specific sub-command
+      $ dataprod install --help  # help section for a specific sub-command
     """
 
     parser = argparse.ArgumentParser(
@@ -139,9 +177,9 @@ def dataprod() -> None:
 
 
 def install(args) -> None:
-    """
-    This function installs user software in the data production environment.
-    The software packages should be specified in the config.yaml file with the
+    """Installs user software in the data production environment.
+
+    The software packages should be specified in the `config_file` with the
     format:
 
     ```yaml
@@ -149,6 +187,12 @@ def install(args) -> None:
       - python_package_spec
       - ...
     ```
+
+    .. code-block:: console
+
+      $ dataprod install config.yaml
+      $ dataprod install --editable config.yaml  # install legend-dataflow in editable mode
+      $ dataprod install --remove config.yaml  # remove install directory
     """
     config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file))
     config_loc = Path(args.config_file).resolve().parent
@@ -166,17 +210,12 @@ def install(args) -> None:
         shutil.rmtree(path_install)
 
     def _runcmd(cmd_expr, cmd_env, **kwargs):
-        msg = (
-            "running: "
-            + " ".join([f"{k}={v}" for k, v in cmd_env.items()])
-            + " "
-            + " ".join(cmd_expr),
-        )
+        msg = "running: " + _execenv2str(cmd_expr, cmd_env)
         log.debug(msg)
 
         subprocess.run(cmd_expr, env=cmd_env, check=True, **kwargs)
 
-    cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True)
+    cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False)
 
     has_uv = False
     try:
@@ -198,7 +237,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
     log.info(f"configuring virtual environment in {path_install}")
     _runcmd(cmd_expr, cmd_env)
 
-    python, cmd_env = execenv_python(config_dict, aslist=True)
+    python, cmd_env = execenv_pyexe(config_dict, "python", as_string=False)
 
     if not has_uv:
         cmd_expr = [
@@ -247,9 +286,7 @@ def _runcmd(cmd_expr, cmd_env, **kwargs):
 
 
 def cmdexec(args) -> None:
-    """
-    This function loads the data production environment and executes a given command.
-    """
+    """Load the data production environment and execute a given command."""
     config_dict = AttrsDict(dbetto.utils.load_dict(args.config_file))
     config_loc = Path(args.config_file).resolve().parent
 
@@ -260,15 +297,10 @@ def cmdexec(args) -> None:
         ignore_missing=False,
     )
 
-    cmd_prefix, cmd_env = execenv_prefix(config_dict, aslist=True)
+    cmd_prefix, cmd_env = execenv_prefix(config_dict, as_string=False)
     cmd_expr = [*cmd_prefix, *args.command]
 
-    msg = (
-        "running: "
-        + " ".join([f"{k}={v}" for k, v in cmd_env.items()])
-        + " "
-        + " ".join(cmd_expr),
-    )
+    msg = "running: " + _execenv2str(cmd_expr, cmd_env)
     log.debug(msg)
 
     subprocess.run(cmd_expr, env=cmd_env, check=True)

From 084ab10c74749ee9afea3393854d63856b688c50 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Wed, 5 Feb 2025 23:32:01 +0100
Subject: [PATCH 097/101] add another simple execenv test

---
 tests/test_execenv.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/test_execenv.py b/tests/test_execenv.py
index 4aa354d..f12af17 100644
--- a/tests/test_execenv.py
+++ b/tests/test_execenv.py
@@ -78,6 +78,17 @@ def test_execenv_prefix(config):
         "--image=legendexp/legend-base:latest "
     )
 
+    config = {
+        "execenv": {
+            "env": {
+                "VAR1": "val1",
+                "VAR2": "val2",
+            }
+        }
+    }
+    cmd_str = execenv.execenv_prefix(config, as_string=True)
+    assert cmd_str == "VAR1=val1 VAR2=val2  "
+
 
 def test_execenv_pyexe(config):
     cmd_str = execenv.execenv_pyexe(config, "dio-boe")

From 6ece274dec9498ba7b0d985723aa969900c2918e Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 6 Feb 2025 12:03:35 +0100
Subject: [PATCH 098/101] round of fixes

---
 workflow/Snakefile                            |  4 ++--
 workflow/Snakefile-build-raw                  | 19 ++++++++++---------
 workflow/profiles/lngs-build-raw/config.yaml  |  2 --
 workflow/rules/main.smk                       |  4 ++--
 .../legenddataflow/scripts/complete_run.py    |  5 ++---
 .../legenddataflow/scripts/write_filelist.py  |  3 ++-
 6 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index db7e3c3..861499f 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -80,7 +80,7 @@ onstart:
 
     # Make sure some packages are initialized before we begin to avoid race conditions
     for pkg in ["dspeed", "lgdo", "matplotlib"]:
-        shell('{swenv} python3 -B -c "import ' + pkg + '"')
+        shell(execenv.execenv_pyexe(config, "python") + f" -c 'import {pkg}'")
 
         # Log parameter catalogs in validity.jsonl files
     hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml"
@@ -169,4 +169,4 @@ rule gen_filelist:
     output:
         temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"),
     script:
-        "scripts/write_filelist.py"
+        "src/legenddataflow/scripts/write_filelist.py"
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index 6346978..ed48960 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -20,6 +20,9 @@ check_in_cycle = True
 swenv = execenv.execenv_prefix(config)
 meta_path = utils.metadata_path(config)
 det_status = utils.det_status_path(config)
+configs = utils.config_path(config)
+chan_maps = utils.chan_map_path(config)
+meta = utils.metadata_path(config)
 
 time = datetime.now().strftime("%Y%m%dT%H%M%SZ")
 
@@ -48,11 +51,10 @@ include: "rules/blinding_check.smk"
 
 
 onstart:
-    print("INFO: starting workflow")
-
-    # Make sure some packages are initialized before we begin to avoid race conditions
-    shell('{swenv} python3 -B -c "import daq2lh5 "')
+    print("INFO: initializing workflow")
 
+    # Make sure some packages are initialized before we send jobs to avoid race conditions
+    shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5'")
 
     raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml"
     if raw_par_cat_file.is_file():
@@ -61,13 +63,12 @@ onstart:
         Path(raw_par_cat_file).parent.mkdir(parents=True, exist_ok=True)
         ParsKeyResolve.write_to_yaml(raw_par_catalog, raw_par_cat_file)
     except NameError:
-        print("No raw parameter catalog found")
+        print("WARNING: no raw parameter catalog found")
 
 
 onsuccess:
-    print("Workflow finished, no error")
-    shell("rm *.gen || true")
-    shell(f"rm {utils.filelist_path(config)}/* || true")
+    shell("rm -f *.gen")
+    shell(f"rm -rf {utils.filelist_path(config)}/*")
 
 
 rule gen_filelist:
@@ -82,7 +83,7 @@ rule gen_filelist:
     output:
         temp(Path(utils.filelist_path(config)) / "{label}-{tier}.filelist"),
     script:
-        "scripts/write_filelist.py"
+        "src/legenddataflow/scripts/write_filelist.py"
 
 
 rule sort_data:
diff --git a/workflow/profiles/lngs-build-raw/config.yaml b/workflow/profiles/lngs-build-raw/config.yaml
index 73b5cb5..14be322 100644
--- a/workflow/profiles/lngs-build-raw/config.yaml
+++ b/workflow/profiles/lngs-build-raw/config.yaml
@@ -1,6 +1,4 @@
 cores: 30
-restart-times: 2
-max-jobs-per-second: 1
 resources:
   - mem_swap=3500
 configfile: config-lngs.yaml
diff --git a/workflow/rules/main.smk b/workflow/rules/main.smk
index a78784d..d557493 100644
--- a/workflow/rules/main.smk
+++ b/workflow/rules/main.smk
@@ -47,7 +47,7 @@ rule autogen_output:
         valid_keys_path=os.path.join(pars_path(config), "valid_keys"),
         filedb_path=os.path.join(pars_path(config), "filedb"),
         setup=lambda wildcards: config,
-        basedir=basedir,
+        basedir=workflow.basedir,
     threads: min(workflow.cores, 64)
     script:
-        "../scripts/complete_run.py"
+        "../src/legenddataflow/scripts/complete_run.py"
diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py
index eff7a90..86dc28f 100644
--- a/workflow/src/legenddataflow/scripts/complete_run.py
+++ b/workflow/src/legenddataflow/scripts/complete_run.py
@@ -7,9 +7,8 @@
 import time
 from pathlib import Path
 
-from .. import patterns
-from .. import utils as ut
-from ..FileKey import FileKey
+from legenddataflow import FileKey, patterns
+from legenddataflow import utils as ut
 
 print("INFO: dataflow ran successfully, now few final checks and scripts")
 
diff --git a/workflow/src/legenddataflow/scripts/write_filelist.py b/workflow/src/legenddataflow/scripts/write_filelist.py
index f27c2ad..edeba98 100644
--- a/workflow/src/legenddataflow/scripts/write_filelist.py
+++ b/workflow/src/legenddataflow/scripts/write_filelist.py
@@ -1,7 +1,8 @@
 # ruff: noqa: F821, T201
-# from snakemake.script import snakemake # snakemake > 8.16
 from pathlib import Path
 
+from snakemake.script import snakemake  # snakemake > 8.16
+
 print(f"INFO: found {len(snakemake.input)} files")
 if len(snakemake.input) == 0:
     print(

From 69660270e46d91acc2604e56f2c446bcb2a6ab26 Mon Sep 17 00:00:00 2001
From: Luigi Pertoldi <luigi.pertoldi@protonmail.com>
Date: Thu, 6 Feb 2025 15:01:18 +0100
Subject: [PATCH 099/101] fixes for complete_run.py

---
 pyproject.toml                                |  2 +-
 workflow/Snakefile-build-raw                  |  2 +-
 .../legenddataflow/scripts/complete_run.py    | 22 +++++++------------
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3aae00f..ebe2550 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,7 +86,7 @@ docs = [
 dataprod                = "legenddataflow.execenv:dataprod"
 create-chankeylist      = "legenddataflow.scripts.create_chankeylist:create_chankeylist"
 merge-channels          = "legenddataflow.scripts.merge_channels:merge_channels"
-build-filedb            = "legenddataflow.scripts.build_filedb:build_filedb"
+build-filedb            = "legenddataflow.scripts.filedb:build_filedb"
 build-tier-dsp          = "legenddataflow.scripts.tier.dsp:build_tier_dsp"
 build-tier-evt          = "legenddataflow.scripts.tier.evt:build_tier_evt"
 build-tier-hit          = "legenddataflow.scripts.tier.hit:build_tier_hit"
diff --git a/workflow/Snakefile-build-raw b/workflow/Snakefile-build-raw
index ed48960..c1464cd 100644
--- a/workflow/Snakefile-build-raw
+++ b/workflow/Snakefile-build-raw
@@ -54,7 +54,7 @@ onstart:
     print("INFO: initializing workflow")
 
     # Make sure some packages are initialized before we send jobs to avoid race conditions
-    shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5'")
+    shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'")
 
     raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml"
     if raw_par_cat_file.is_file():
diff --git a/workflow/src/legenddataflow/scripts/complete_run.py b/workflow/src/legenddataflow/scripts/complete_run.py
index 86dc28f..ea4a2b1 100644
--- a/workflow/src/legenddataflow/scripts/complete_run.py
+++ b/workflow/src/legenddataflow/scripts/complete_run.py
@@ -9,6 +9,7 @@
 
 from legenddataflow import FileKey, patterns
 from legenddataflow import utils as ut
+from legenddataflow.execenv import _execenv2str, execenv_pyexe
 
 print("INFO: dataflow ran successfully, now few final checks and scripts")
 
@@ -199,14 +200,14 @@ def build_file_dbs(gen_tier_path, outdir):
             Path(ut.tmp_log_path(snakemake.params.setup))
             / outfile.with_suffix(".log").name
         )
+
         print(f"INFO: ......building {outfile}")
+        pre_cmdline, cmdenv = execenv_pyexe(
+            snakemake.params.setup, "build-filedb", as_string=False
+        )
 
         cmdline = [
-            *ut.runcmd(snakemake.params.setup, aslist=True),
-            "--",
-            "python3",
-            "-B",
-            f"{snakemake.params.basedir}/scripts/build_fdb.py",
+            *pre_cmdline,
             "--scan-path",
             spec,
             "--output",
@@ -220,10 +221,8 @@ def build_file_dbs(gen_tier_path, outdir):
         if speck[0] == "phy":
             cmdline += ["--assume-nonsparse"]
 
-        cmdenv = {}
-
         # TODO: forward stdout to log file
-        processes.add(subprocess.Popen(cmdline))
+        processes.add(subprocess.Popen(cmdline, env=cmdenv))
 
         if len(processes) >= snakemake.threads:
             os.wait()
@@ -235,12 +234,7 @@ def build_file_dbs(gen_tier_path, outdir):
 
     for p in processes:
         if p.returncode != 0:
-            _cmdline = (
-                " ".join([f"{k}={v}" for k, v in cmdenv.items()])
-                + " "
-                + " ".join(p.args)
-            )
-            msg = f"at least one FileDB building thread failed: {_cmdline}"
+            msg = f"at least one FileDB building thread failed: {_execenv2str(p.args, cmdenv)}"
             raise RuntimeError(msg)
 
     toc = time.time()

From aff2370c7f4c9b16e270022d032f64db80f6244f Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 6 Feb 2025 15:33:27 +0100
Subject: [PATCH 100/101] move common code into own scripts

---
 .../scripts/par/geds/dsp/dplms.py             |  9 +---
 .../scripts/par/geds/dsp/eopt.py              |  6 +--
 .../scripts/par/geds/dsp/evtsel.py            | 32 ++++++--------
 .../scripts/par/geds/dsp/tau.py               | 32 ++++++--------
 .../scripts/par/geds/hit/aoe.py               | 33 +++++----------
 .../scripts/par/geds/hit/ecal.py              | 32 ++++++--------
 .../legenddataflow/scripts/par/geds/hit/lq.py | 33 +++++----------
 .../legenddataflow/scripts/par/geds/hit/qc.py | 29 ++++---------
 .../scripts/par/geds/pht/aoe.py               | 37 ++++++----------
 .../scripts/par/geds/pht/ecal_part.py         | 36 ++++++----------
 .../scripts/par/geds/pht/fast.py              | 39 +++++++----------
 .../legenddataflow/scripts/par/geds/pht/lq.py | 42 +++++++------------
 .../legenddataflow/scripts/par/geds/pht/qc.py | 36 +++++-----------
 .../scripts/par/geds/pht/qc_phy.py            |  6 +--
 .../scripts/par/geds/psp/average.py           |  7 +---
 .../scripts/par/geds/raw/blindcal.py          |  7 ++--
 .../scripts/par/geds/raw/blindcheck.py        |  8 ++--
 .../scripts/par/geds/tcm/pulser.py            |  6 +--
 .../legenddataflow/scripts/pulser_removal.py  | 30 +++++++++++++
 .../src/legenddataflow/scripts/table_name.py  |  7 ++++
 20 files changed, 187 insertions(+), 280 deletions(-)
 create mode 100644 workflow/src/legenddataflow/scripts/pulser_removal.py
 create mode 100644 workflow/src/legenddataflow/scripts/table_name.py

diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
index 16343dc..dabfb21 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/dplms.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import pickle as pkl
 import time
 from pathlib import Path
@@ -8,11 +7,11 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from lgdo import Array, Table
 from pygama.pargen.dplms_ge_dict import dplms_ge_dict
 
 from ....log import build_log
+from ...table_name import get_table_name
 
 
 def par_geds_dsp_dplms() -> None:
@@ -40,13 +39,9 @@ def par_geds_dsp_dplms() -> None:
     config_dict = configs["snakemake_rules"]["pars_dsp_dplms"]
 
     log = build_log(config_dict, args.log)
-
-    log = logging.getLogger(__name__)
     sto = lh5.LH5Store()
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     configs = TextDB(args.configs).on(args.timestamp, system=args.datatype)
     dsp_config = config_dict["inputs"]["proc_chain"][args.channel]
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
index 6376ed5..edd215b 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/eopt.py
@@ -11,7 +11,6 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from dspeed.units import unit_registry as ureg
-from legendmeta import LegendMetadata
 from pygama.math.distributions import hpge_peak
 from pygama.pargen.dsp_optimize import (
     BayesianOptimizer,
@@ -20,6 +19,7 @@
 )
 
 from ....log import build_log
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -59,9 +59,7 @@ def par_geds_dsp_eopt() -> None:
     sto = lh5.LH5Store()
     t0 = time.time()
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
     opt_json = config_dict["inputs"]["optimiser_config"][args.channel]
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
index afd4a0b..245cbb2 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/evtsel.py
@@ -12,11 +12,12 @@
 import pygama.pargen.energy_cal as pgc
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.pargen.data_cleaning import generate_cuts, get_keys, get_tcm_pulser_ids
+from pygama.pargen.data_cleaning import generate_cuts, get_keys
 from pygama.pargen.dsp_optimize import run_one_dsp
 
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -114,9 +115,7 @@ def par_geds_dsp_evtsel() -> None:
     sto = lh5.LH5Store()
     t0 = time.time()
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     dsp_config = config_dict["inputs"]["processing_chain"][args.channel]
     peak_json = config_dict["inputs"]["peak_config"][args.channel]
@@ -135,21 +134,14 @@ def par_geds_dsp_evtsel() -> None:
             files = f.read().splitlines()
         raw_files = sorted(files)
 
-        if args.pulser_file:
-            pulser_dict = Props.read_from(args.pulser_file)
-            mask = np.array(pulser_dict["mask"])
-
-        elif args.tcm_filelist:
-            # get pulser mask from tcm files
-            with Path(args.tcm_filelist).open() as f:
-                tcm_files = f.read().splitlines()
-            tcm_files = sorted(np.unique(tcm_files))
-            ids, mask = get_tcm_pulser_ids(
-                tcm_files, channel, peak_dict["pulser_multiplicity_threshold"]
-            )
-        else:
-            msg = "No pulser file or tcm filelist provided"
-            raise ValueError(msg)
+        mask = get_pulser_mask(
+            pulser_file=args.pulser_file,
+            tcm_filelist=args.tcm_filelist,
+            channel=channel,
+            pulser_multiplicity_threshold=peak_dict.get(
+                "pulser_multiplicity_threshold"
+            ),
+        )
 
         raw_dict = Props.read_from(args.raw_cal)[channel]["pars"]["operations"]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
index a86e531..c966495 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/dsp/tau.py
@@ -6,12 +6,13 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
-from pygama.pargen.data_cleaning import get_cut_indexes, get_tcm_pulser_ids
+from pygama.pargen.data_cleaning import get_cut_indexes
 from pygama.pargen.dsp_optimize import run_one_dsp
 from pygama.pargen.extract_tau import ExtractTau
 
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 
 def par_geds_dsp_tau() -> None:
@@ -44,9 +45,7 @@ def par_geds_dsp_tau() -> None:
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     channel_dict = config_dict["inputs"]["processing_chain"][args.channel]
     kwarg_dict = config_dict["inputs"]["tau_config"][args.channel]
@@ -66,21 +65,14 @@ def par_geds_dsp_tau() -> None:
         else:
             input_file = args.raw_files
 
-        if args.pulser_file:
-            pulser_dict = Props.read_from(args.pulser_file)
-            mask = np.array(pulser_dict["mask"])
-
-        elif args.tcm_filelist:
-            # get pulser mask from tcm files
-            with Path(args.tcm_filelist).open() as f:
-                tcm_files = f.read().splitlines()
-            tcm_files = sorted(np.unique(tcm_files))
-            ids, mask = get_tcm_pulser_ids(
-                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-            )
-        else:
-            msg = "No pulser file or tcm filelist provided"
-            raise ValueError(msg)
+        mask = get_pulser_mask(
+            pulser_file=args.pulser_file,
+            tcm_filelist=args.tcm_files,
+            channel=channel,
+            pulser_multiplicity_threshold=kwarg_dict.get(
+                "pulser_multiplicity_threshold"
+            ),
+        )
 
         data = sto.read(
             f"{channel}/raw",
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
index 74ece8e..df2719d 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/aoe.py
@@ -8,14 +8,14 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 
 from ....convert_np import convert_dict_np_to_float
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -80,9 +80,7 @@ def par_geds_hit_aoe() -> None:
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     channel_dict = config_dict["inputs"]["aoecal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
@@ -157,23 +155,14 @@ def eres_func(x):
             return_selection_mask=True,
         )
 
-        if args.pulser_file:
-            pulser_dict = Props.read_from(args.pulser_file)
-            mask = np.array(pulser_dict["mask"])
-            if "pulser_multiplicity_threshold" in kwarg_dict:
-                kwarg_dict.pop("pulser_multiplicity_threshold")
-
-        elif args.tcm_filelist:
-            # get pulser mask from tcm files
-            with Path(args.tcm_filelist).open() as f:
-                tcm_files = f.read().splitlines()
-            tcm_files = sorted(np.unique(tcm_files))
-            ids, mask = get_tcm_pulser_ids(
-                tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-            )
-        else:
-            msg = "No pulser file or tcm filelist provided"
-            raise ValueError(msg)
+        mask = get_pulser_mask(
+            pulser_file=args.pulser_file,
+            tcm_filelist=args.tcm_filelist,
+            channel=channel,
+            pulser_multiplicity_threshold=kwarg_dict.get(
+                "pulser_multiplicity_threshold"
+            ),
+        )
 
         data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
index c763433..3a4e30a 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/ecal.py
@@ -18,13 +18,15 @@
 from legendmeta import LegendMetadata
 from matplotlib.colors import LogNorm
 from pygama.math.distributions import nb_poly
-from pygama.pargen.data_cleaning import get_mode_stdev, get_tcm_pulser_ids
+from pygama.pargen.data_cleaning import get_mode_stdev
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 from scipy.stats import binned_statistic
 
 from ....convert_np import convert_dict_np_to_float
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 mpl.use("agg")
 sto = lh5.LH5Store()
@@ -478,10 +480,11 @@ def par_geds_hit_ecal() -> None:
 
     build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
+    chmap = LegendMetadata(args.metadata).channelmap(
+        args.timestamp, system=args.datatype
+    )
     det_status = chmap[args.channel]["analysis"]["usability"]
 
     if args.in_hit_dict:
@@ -529,21 +532,12 @@ def par_geds_hit_ecal() -> None:
         cal_energy_param="trapTmax",
     )
 
-    if args.pulser_file:
-        pulser_dict = Props.read_from(args.pulser_file)
-        mask = np.array(pulser_dict["mask"])
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
+    mask = get_pulser_mask(
+        pulser_file=args.pulser_file,
+        tcm_filelist=args.tcm_filelist,
+        channel=channel,
+        pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"),
+    )
 
     data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
index b4dc3f2..6bacb36 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/lq.py
@@ -8,16 +8,16 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 
 from ....convert_np import convert_dict_np_to_float
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -79,9 +79,7 @@ def par_geds_hit_lq() -> None:
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
@@ -135,23 +133,14 @@ def eres_func(x):
             return_selection_mask=True,
         )
 
-        if args.pulser_file:
-            pulser_dict = Props.read_from(args.pulser_file)
-            mask = np.array(pulser_dict["mask"])
-            if "pulser_multiplicity_threshold" in kwarg_dict:
-                kwarg_dict.pop("pulser_multiplicity_threshold")
-
-        elif args.tcm_filelist:
-            # get pulser mask from tcm files
-            with Path(args.tcm_filelist).open() as f:
-                tcm_files = f.read().splitlines()
-            tcm_files = sorted(np.unique(tcm_files))
-            ids, mask = get_tcm_pulser_ids(
-                tcm_files, channel, kwarg_dict.pop("pulser_multiplicity_threshold")
-            )
-        else:
-            msg = "No pulser file or tcm filelist provided"
-            raise ValueError(msg)
+        mask = get_pulser_mask(
+            pulser_file=args.pulser_file,
+            tcm_filelist=args.tcm_filelist,
+            channel=channel,
+            pulser_multiplicity_threshold=kwarg_dict.get(
+                "pulser_multiplicity_threshold"
+            ),
+        )
 
         data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
index 33934c4..0f9387c 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/hit/qc.py
@@ -10,17 +10,17 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
-    get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
 
 from ....convert_np import convert_dict_np_to_float
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -62,9 +62,7 @@ def par_geds_hit_qc() -> None:
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     # get metadata dictionary
     channel_dict = config_dict["inputs"]["qc_config"][args.channel]
@@ -198,21 +196,12 @@ def par_geds_hit_qc() -> None:
         cal_energy_param="trapTmax",
     )
 
-    if args.pulser_file:
-        pulser_dict = Props.read_from(args.pulser_file)
-        mask = np.array(pulser_dict["mask"])
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
+    mask = get_pulser_mask(
+        pulser_file=args.pulser_file,
+        tcm_filelist=args.tcm_filelist,
+        channel=channel,
+        pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"),
+    )
 
     data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
index 76383ef..fd21aa3 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/aoe.py
@@ -14,11 +14,12 @@
 from legendmeta import LegendMetadata
 from pygama.pargen.AoE_cal import *  # noqa: F403
 from pygama.pargen.AoE_cal import CalAoE, Pol1, SigmaFit, aoe_peak
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 
 from ....FileKey import ChannelProcKey, ProcessingFileKey
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -264,9 +265,7 @@ def par_geds_pht_aoe() -> None:
 
     build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     channel_dict = config_dict["inputs"]["par_pht_aoecal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
@@ -345,26 +344,16 @@ def par_geds_pht_aoe() -> None:
             return_selection_mask=True,
         )
 
-        if args.pulser_files:
-            mask = np.array([], dtype=bool)
-            for file in args.pulser_files:
-                pulser_dict = Props.read_from(file)
-                pulser_mask = np.array(pulser_dict["mask"])
-                mask = np.append(mask, pulser_mask)
-            if "pulser_multiplicity_threshold" in kwarg_dict:
-                kwarg_dict.pop("pulser_multiplicity_threshold")
-
-        elif args.tcm_filelist:
-            # get pulser mask from tcm files
-            with Path(args.tcm_filelist).open() as f:
-                tcm_files = f.read().splitlines()
-            tcm_files = sorted(np.unique(tcm_files))
-            ids, mask = get_tcm_pulser_ids(
-                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-            )
-        else:
-            msg = "No pulser file or tcm filelist provided"
-            raise ValueError(msg)
+        mask = get_pulser_mask(
+            pulser_file=args.pulser_files,
+            tcm_filelist=args.tcm_filelist,
+            channel=channel,
+            pulser_multiplicity_threshold=kwarg_dict.get(
+                "pulser_multiplicity_threshold"
+            ),
+        )
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
 
         data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
index 6d9babf..ccee7c1 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/ecal_part.py
@@ -15,12 +15,13 @@
 from dbetto.catalog import Props
 from legendmeta import LegendMetadata
 from pygama.math.distributions import nb_poly
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.energy_cal import FWHMLinear, FWHMQuadratic, HPGeCalibration
 from pygama.pargen.utils import load_data
 
 from ....FileKey import ChannelProcKey, ProcessingFileKey
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -464,9 +465,8 @@ def calibrate_partition(
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
+    chmap = LegendMetadata(path=args.metadata).on(args.timestamp, system=args.datatype)
 
     cal_dict = {}
     results_dicts = {}
@@ -529,26 +529,14 @@ def calibrate_partition(
         cal_energy_param=kwarg_dict["energy_params"][0],
     )
 
-    if args.pulser_files:
-        mask = np.array([], dtype=bool)
-        for file in args.pulser_files:
-            pulser_dict = Props.read_from(file)
-            pulser_mask = np.array(pulser_dict["mask"])
-            mask = np.append(mask, pulser_mask)
-        if "pulser_multiplicity_threshold" in kwarg_dict:
-            kwarg_dict.pop("pulser_multiplicity_threshold")
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
+    mask = get_pulser_mask(
+        pulser_file=args.pulser_files,
+        tcm_filelist=args.tcm_filelist,
+        channel=channel,
+        pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"),
+    )
+    if "pulser_multiplicity_threshold" in kwarg_dict:
+        kwarg_dict.pop("pulser_multiplicity_threshold")
 
     data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
index c3089e1..c1ac946 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/fast.py
@@ -11,7 +11,6 @@
 from dbetto import TextDB
 from dbetto.catalog import Props
 from legendmeta import LegendMetadata
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.utils import load_data
 from workflow.src.legenddataflow.scripts.par.geds.pht.aoe import run_aoe_calibration
 from workflow.src.legenddataflow.scripts.par.geds.pht.lq import run_lq_calibration
@@ -19,6 +18,8 @@
 
 from ....FileKey import ChannelProcKey, ProcessingFileKey
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 warnings.filterwarnings(action="ignore", category=np.RankWarning)
@@ -85,9 +86,10 @@ def par_geds_pht_fast() -> None:
 
     build_log(config_dict["pars_pht_partcal"], args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
+    chmap = LegendMetadata(args.metadata).channelmap(
+        args.timestamp, system=args.datatype
+    )
 
     cal_dict = {}
     results_dicts = {}
@@ -186,27 +188,14 @@ def par_geds_pht_fast() -> None:
         cal_energy_param=kwarg_dict["energy_params"][0],
     )
 
-    if args.pulser_files:
-        mask = np.array([], dtype=bool)
-        for file in args.pulser_files:
-            with Path(file).open() as f:
-                pulser_dict = json.load(f)
-            pulser_mask = np.array(pulser_dict["mask"])
-            mask = np.append(mask, pulser_mask)
-        if "pulser_multiplicity_threshold" in kwarg_dict:
-            kwarg_dict.pop("pulser_multiplicity_threshold")
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
+    mask = get_pulser_mask(
+        pulser_file=args.pulser_files,
+        tcm_filelist=args.tcm_filelist,
+        channel=channel,
+        pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"),
+    )
+    if "pulser_multiplicity_threshold" in kwarg_dict:
+        kwarg_dict.pop("pulser_multiplicity_threshold")
 
     data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
index f46914c..12cd06d 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/lq.py
@@ -14,13 +14,14 @@
 from legendmeta import LegendMetadata
 from pygama.math.distributions import gaussian
 from pygama.pargen.AoE_cal import *  # noqa: F403
-from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 from pygama.pargen.lq_cal import *  # noqa: F403
 from pygama.pargen.lq_cal import LQCal
 from pygama.pargen.utils import load_data
 
-from ..FileKey import ChannelProcKey, ProcessingFileKey
-from ..log import build_log
+from ....FileKey import ChannelProcKey, ProcessingFileKey
+from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -281,9 +282,7 @@ def eres_func(x):
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     channel_dict = config_dict["inputs"]["lqcal_config"][args.channel]
     kwarg_dict = Props.read_from(channel_dict)
@@ -353,27 +352,16 @@ def eres_func(x):
             return_selection_mask=True,
         )
 
-        if args.pulser_files:
-            mask = np.array([], dtype=bool)
-            for file in args.pulser_files:
-                with Path(file).open() as f:
-                    pulser_dict = json.load(f)
-                pulser_mask = np.array(pulser_dict["mask"])
-                mask = np.append(mask, pulser_mask)
-            if "pulser_multiplicity_threshold" in kwarg_dict:
-                kwarg_dict.pop("pulser_multiplicity_threshold")
-
-        elif args.tcm_filelist:
-            # get pulser mask from tcm files
-            with Path(args.tcm_filelist).open() as f:
-                tcm_files = f.read().splitlines()
-            tcm_files = sorted(np.unique(tcm_files))
-            ids, mask = get_tcm_pulser_ids(
-                tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-            )
-        else:
-            msg = "No pulser file or tcm filelist provided"
-            raise ValueError(msg)
+        mask = get_pulser_mask(
+            pulser_file=args.pulser_files,
+            tcm_filelist=args.tcm_filelist,
+            channel=channel,
+            pulser_multiplicity_threshold=kwarg_dict.get(
+                "pulser_multiplicity_threshold"
+            ),
+        )
+        if "pulser_multiplicity_threshold" in kwarg_dict:
+            kwarg_dict.pop("pulser_multiplicity_threshold")
 
         data["is_pulser"] = mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
index c6ac3df..89c9f4d 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc.py
@@ -10,17 +10,17 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
     get_keys,
-    get_tcm_pulser_ids,
 )
 from pygama.pargen.utils import load_data
 
 from ....convert_np import convert_dict_np_to_float
 from ....log import build_log
+from ...pulser_removal import get_pulser_mask
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -64,9 +64,7 @@
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     # get metadata dictionary
     channel_dict = config_dict["inputs"]["qc_config"][args.channel]
@@ -220,26 +218,14 @@
         cal_energy_param="trapTmax",
     )
 
-    if args.pulser_files:
-        total_mask = np.array([], dtype=bool)
-        for file in args.pulser_files:
-            pulser_dict = Props.read_from(file)
-            pulser_mask = np.array(pulser_dict["mask"])
-            total_mask = np.append(total_mask, pulser_mask)
-        if "pulser_multiplicity_threshold" in kwarg_dict:
-            kwarg_dict.pop("pulser_multiplicity_threshold")
-
-    elif args.tcm_filelist:
-        # get pulser mask from tcm files
-        with Path(args.tcm_filelist).open() as f:
-            tcm_files = f.read().splitlines()
-        tcm_files = sorted(np.unique(tcm_files))
-        ids, total_mask = get_tcm_pulser_ids(
-            tcm_files, channel, kwarg_dict["pulser_multiplicity_threshold"]
-        )
-    else:
-        msg = "No pulser file or tcm filelist provided"
-        raise ValueError(msg)
+    total_mask = get_pulser_mask(
+        pulser_file=args.pulser_files,
+        tcm_filelist=args.tcm_filelist,
+        channel=channel,
+        pulser_multiplicity_threshold=kwarg_dict.get("pulser_multiplicity_threshold"),
+    )
+    if "pulser_multiplicity_threshold" in kwarg_dict:
+        kwarg_dict.pop("pulser_multiplicity_threshold")
 
     data["is_pulser"] = total_mask[threshold_mask]
 
diff --git a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
index 9007ad7..fa11226 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/pht/qc_phy.py
@@ -11,7 +11,6 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from lgdo.lh5 import ls
 from pygama.pargen.data_cleaning import (
     generate_cut_classifiers,
@@ -20,6 +19,7 @@
 
 from ....convert_np import convert_dict_np_to_float
 from ....log import build_log
+from ...table_name import get_table_name
 
 warnings.filterwarnings(action="ignore", category=RuntimeWarning)
 
@@ -52,9 +52,7 @@
 
     log = build_log(config_dict, args.log)
 
-    meta = LegendMetadata(path=args.metadata)
-    chmap = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     # get metadata dictionary
     channel_dict = config_dict["qc_config"][args.channel]
diff --git a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
index 3ba1423..99bb28a 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/psp/average.py
@@ -7,8 +7,7 @@
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import numpy as np
-from dbetto.catalog import Props
-from legendmeta import LegendMetadata
+from dbetto.catalog import Props, TextDB
 
 from ....FileKey import ChannelProcKey
 
@@ -44,9 +43,7 @@ def par_geds_psp_average() -> None:
     argparser.add_argument("--channel", help="Channel", type=str, required=True)
     args = argparser.parse_args()
 
-    configs = LegendMetadata(args.configs, lazy=True).on(
-        args.timestamp, system=args.datatype
-    )
+    configs = TextDB(args.configs, lazy=True).on(args.timestamp, system=args.datatype)
     merge_config = Props.read_from(
         configs["snakemake_rules"]["pars_psp"]["inputs"]["psp_config"][args.channel]
     )
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
index eeaaf2b..e1d8b30 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcal.py
@@ -13,10 +13,11 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from lgdo import lh5
 from pygama.pargen.energy_cal import HPGeCalibration
 
+from ...table_name import get_table_name
+
 mpl.use("agg")
 
 
@@ -45,9 +46,7 @@ def par_geds_raw_blindcal() -> None:
     logging.getLogger("matplotlib").setLevel(logging.INFO)
     log = logging.getLogger(__name__)
 
-    meta = LegendMetadata(path=args.meta)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.meta, args.timestamp, args.datatype, args.channel)
 
     # peaks to search for
     peaks_keV = np.array(
diff --git a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
index 5f60c54..165edb4 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/raw/blindcheck.py
@@ -22,6 +22,7 @@
 from pygama.pargen.energy_cal import get_i_local_maxima
 
 from ....log import build_log
+from ...table_name import get_table_name
 
 mpl.use("Agg")
 
@@ -48,10 +49,9 @@ def par_geds_raw_blindcheck() -> None:
     log = build_log(config_dict, args.log)
 
     # get the usability status for this channel
-    chmap = LegendMetadata(args.metadata, lazy=True).channelmap(
-        args.timestamp, system=args.datatype
-    )
-    channel = f"ch{chmap[args.channel].daq.rawid:07}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
+
+    chmap = LegendMetadata(args.meta).channelmap(args.timestamp, system=args.datatype)
     det_status = chmap[args.channel]["analysis"]["is_blinded"]
 
     # read in calibration curve for this channel
diff --git a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
index 4c75d62..1b0cceb 100644
--- a/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
+++ b/workflow/src/legenddataflow/scripts/par/geds/tcm/pulser.py
@@ -4,10 +4,10 @@
 import numpy as np
 from dbetto import TextDB
 from dbetto.catalog import Props
-from legendmeta import LegendMetadata
 from pygama.pargen.data_cleaning import get_tcm_pulser_ids
 
 from ....log import build_log
+from ...table_name import get_table_name
 
 
 def par_geds_tcm_pulser() -> None:
@@ -35,9 +35,7 @@ def par_geds_tcm_pulser() -> None:
     kwarg_dict = config_dict["inputs"]["pulser_config"]
     kwarg_dict = Props.read_from(kwarg_dict)
 
-    meta = LegendMetadata(path=args.metadata)
-    channel_dict = meta.channelmap(args.timestamp, system=args.datatype)
-    channel = f"ch{channel_dict[args.channel].daq.rawid}"
+    channel = get_table_name(args.metadata, args.timestamp, args.datatype, args.channel)
 
     if (
         isinstance(args.tcm_files, list)
diff --git a/workflow/src/legenddataflow/scripts/pulser_removal.py b/workflow/src/legenddataflow/scripts/pulser_removal.py
new file mode 100644
index 0000000..40b3045
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/pulser_removal.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import numpy as np
+from dbetto.catalog import Props
+from pygama.pargen.data_cleaning import get_tcm_pulser_ids
+
+
+def get_pulser_mask(
+    pulser_file=None, tcm_filelist=None, channel=None, pulser_multiplicity_threshold=10
+):
+    if pulser_file is not None:
+        if not isinstance(pulser_file, list):
+            pulser_file = [pulser_file]
+        mask = np.array([], dtype=bool)
+        for file in pulser_file:
+            pulser_dict = Props.read_from(file)
+            pulser_mask = np.array(pulser_dict["mask"])
+            mask = np.append(mask, pulser_mask)
+
+    elif tcm_filelist is not None:
+        # get pulser mask from tcm files
+        with Path(tcm_filelist).open() as f:
+            tcm_files = f.read().splitlines()
+        tcm_files = sorted(np.unique(tcm_files))
+        _, mask = get_tcm_pulser_ids(tcm_files, channel, pulser_multiplicity_threshold)
+    else:
+        msg = "No pulser file or tcm filelist provided"
+        raise ValueError(msg)
+
+    return mask
diff --git a/workflow/src/legenddataflow/scripts/table_name.py b/workflow/src/legenddataflow/scripts/table_name.py
new file mode 100644
index 0000000..653bbb1
--- /dev/null
+++ b/workflow/src/legenddataflow/scripts/table_name.py
@@ -0,0 +1,7 @@
+from legendmeta import LegendMetadata
+
+
+def get_table_name(metadata_path, timestamp, datatype, detector):
+    meta = LegendMetadata(path=metadata_path)
+    channel_dict = meta.channelmap(timestamp, system=datatype)
+    return f"ch{channel_dict[detector].daq.rawid:07}"

From cbb29c85ca51aa3e049cd2127d2929dd62d1299e Mon Sep 17 00:00:00 2001
From: ggmarshall <george.marshall.20@ucl.ac.uk>
Date: Thu, 6 Feb 2025 16:16:19 +0100
Subject: [PATCH 101/101] make table name lazy

---
 workflow/src/legenddataflow/scripts/table_name.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/src/legenddataflow/scripts/table_name.py b/workflow/src/legenddataflow/scripts/table_name.py
index 653bbb1..935e811 100644
--- a/workflow/src/legenddataflow/scripts/table_name.py
+++ b/workflow/src/legenddataflow/scripts/table_name.py
@@ -2,6 +2,6 @@
 
 
 def get_table_name(metadata_path, timestamp, datatype, detector):
-    meta = LegendMetadata(path=metadata_path)
+    meta = LegendMetadata(path=metadata_path, lazy=True)
     channel_dict = meta.channelmap(timestamp, system=datatype)
     return f"ch{channel_dict[detector].daq.rawid:07}"