From a9edf30376e6cf1e471ead29899b23afc58c68a1 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Mon, 19 Aug 2024 10:46:56 +0200
Subject: [PATCH 01/50] turn the scripts dir into a module, to enable importing
 code from the repo rather than the installed package

---
 scripts/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 scripts/__init__.py

diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b

From 6a998b10a8bbc52061a32d3004e840592e5e55d4 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Mon, 19 Aug 2024 10:47:06 +0200
Subject: [PATCH 02/50] use module import

---
 scripts/zika_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/zika_methods.py b/scripts/zika_methods.py
index a813bd47..8a90548e 100644
--- a/scripts/zika_methods.py
+++ b/scripts/zika_methods.py
@@ -10,9 +10,9 @@
 
 import numpy as np
 import pandas as pd
-import zika_utils
 
 from epp_utils.udf_tools import is_filled
+from scripts import zika_utils
 
 
 def pool_fixed_vol(

From a55379b86b1f51ad5d0ec02bc1f92075a51d2d5e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Mon, 19 Aug 2024 10:52:18 +0200
Subject: [PATCH 03/50] modularize and nest "calc_from_args_utils" and
 "epp_utils"

---
 .../calc_from_args_utils}/__init__.py                         | 0
 .../calc_from_args_utils}/calculation_methods.py              | 4 ++--
 .../calc_from_args_utils}/udf_arg_methods.py                  | 2 +-
 {epp_utils => scilifelab_epps/epp_utils}/__init__.py          | 0
 {epp_utils => scilifelab_epps/epp_utils}/formula.py           | 0
 {epp_utils => scilifelab_epps/epp_utils}/udf_tools.py         | 0
 scripts/calc_from_args.py                                     | 2 +-
 scripts/generate_minknow_samplesheet.py                       | 2 +-
 scripts/log_udfs.py                                           | 2 +-
 scripts/molar_concentration.py                                | 2 +-
 scripts/ont_calc_volumes.py                                   | 2 +-
 scripts/ont_pool.py                                           | 2 +-
 scripts/ont_sync_to_db.py                                     | 2 +-
 scripts/ont_update_amount.py                                  | 2 +-
 scripts/parse_anglerfish_results.py                           | 2 +-
 scripts/parse_ba_results.py                                   | 2 +-
 scripts/qc_amount_calculation.py                              | 2 +-
 scripts/zika_methods.py                                       | 2 +-
 scripts/zika_utils.py                                         | 2 +-
 19 files changed, 16 insertions(+), 16 deletions(-)
 rename {calc_from_args_utils => scilifelab_epps/calc_from_args_utils}/__init__.py (100%)
 rename {calc_from_args_utils => scilifelab_epps/calc_from_args_utils}/calculation_methods.py (99%)
 rename {calc_from_args_utils => scilifelab_epps/calc_from_args_utils}/udf_arg_methods.py (98%)
 rename {epp_utils => scilifelab_epps/epp_utils}/__init__.py (100%)
 rename {epp_utils => scilifelab_epps/epp_utils}/formula.py (100%)
 rename {epp_utils => scilifelab_epps/epp_utils}/udf_tools.py (100%)

diff --git a/calc_from_args_utils/__init__.py b/scilifelab_epps/calc_from_args_utils/__init__.py
similarity index 100%
rename from calc_from_args_utils/__init__.py
rename to scilifelab_epps/calc_from_args_utils/__init__.py
diff --git a/calc_from_args_utils/calculation_methods.py b/scilifelab_epps/calc_from_args_utils/calculation_methods.py
similarity index 99%
rename from calc_from_args_utils/calculation_methods.py
rename to scilifelab_epps/calc_from_args_utils/calculation_methods.py
index 5cb33f28..7e7abfc3 100644
--- a/calc_from_args_utils/calculation_methods.py
+++ b/scilifelab_epps/calc_from_args_utils/calculation_methods.py
@@ -7,12 +7,12 @@
 import tabulate
 from genologics.entities import Process
 
-from calc_from_args_utils.udf_arg_methods import (
+from scilifelab_epps.calc_from_args_utils.udf_arg_methods import (
     fetch_from_arg,
     get_UDF_source,
     get_UDF_source_name,
 )
-from epp_utils import formula, udf_tools
+from scilifelab_epps.epp_utils import formula, udf_tools
 
 DESC = """This file contains the method functions for a UDF-agnostic script."""
 
diff --git a/calc_from_args_utils/udf_arg_methods.py b/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py
similarity index 98%
rename from calc_from_args_utils/udf_arg_methods.py
rename to scilifelab_epps/calc_from_args_utils/udf_arg_methods.py
index 3f5a56f4..a321025f 100644
--- a/calc_from_args_utils/udf_arg_methods.py
+++ b/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py
@@ -5,7 +5,7 @@
 import yaml
 from genologics.entities import Artifact, Process
 
-from epp_utils import udf_tools
+from scilifelab_epps.epp_utils import udf_tools
 
 
 def fetch_from_arg(
diff --git a/epp_utils/__init__.py b/scilifelab_epps/epp_utils/__init__.py
similarity index 100%
rename from epp_utils/__init__.py
rename to scilifelab_epps/epp_utils/__init__.py
diff --git a/epp_utils/formula.py b/scilifelab_epps/epp_utils/formula.py
similarity index 100%
rename from epp_utils/formula.py
rename to scilifelab_epps/epp_utils/formula.py
diff --git a/epp_utils/udf_tools.py b/scilifelab_epps/epp_utils/udf_tools.py
similarity index 100%
rename from epp_utils/udf_tools.py
rename to scilifelab_epps/epp_utils/udf_tools.py
diff --git a/scripts/calc_from_args.py b/scripts/calc_from_args.py
index b8c398d4..1f6e48b1 100644
--- a/scripts/calc_from_args.py
+++ b/scripts/calc_from_args.py
@@ -9,7 +9,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from calc_from_args_utils import calculation_methods
+from scilifelab_epps.calc_from_args_utils import calculation_methods
 from scilifelab_epps.epp import upload_file
 
 DESC = """UDF-agnostic script to perform calculations across all artifacts of a step.
diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py
index 25122426..88ce0410 100644
--- a/scripts/generate_minknow_samplesheet.py
+++ b/scripts/generate_minknow_samplesheet.py
@@ -17,7 +17,7 @@
 from tabulate import tabulate
 
 from data.ONT_barcodes import ONT_BARCODE_LABEL_PATTERN, ONT_BARCODES
-from epp_utils.udf_tools import fetch
+from scilifelab_epps.epp_utils.udf_tools import fetch
 from scilifelab_epps.epp import traceback_to_step, upload_file
 
 DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs.
diff --git a/scripts/log_udfs.py b/scripts/log_udfs.py
index 5d86de13..24fb52f6 100644
--- a/scripts/log_udfs.py
+++ b/scripts/log_udfs.py
@@ -12,7 +12,7 @@
 from ont_send_reloading_info_to_db import parse_run
 from tabulate import tabulate
 
-from epp_utils import udf_tools
+from scilifelab_epps.epp_utils import udf_tools
 
 DESC = """Script for the EPP "Log fields" and file slot "Field log".
 
diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py
index 40289d4a..1b0eee71 100644
--- a/scripts/molar_concentration.py
+++ b/scripts/molar_concentration.py
@@ -16,7 +16,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from epp_utils.formula import ng_ul_to_nM
+from scilifelab_epps.epp_utils.formula import ng_ul_to_nM
 from scilifelab_epps.epp import EppLogger
 
 
diff --git a/scripts/ont_calc_volumes.py b/scripts/ont_calc_volumes.py
index 5aafca99..b9528a9f 100644
--- a/scripts/ont_calc_volumes.py
+++ b/scripts/ont_calc_volumes.py
@@ -8,7 +8,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from epp_utils import formula, udf_tools
+from scilifelab_epps.epp_utils import formula, udf_tools
 
 DESC = """
 EPP "ONT calculate volumes"
diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py
index fd067f4d..9a8b7304 100644
--- a/scripts/ont_pool.py
+++ b/scripts/ont_pool.py
@@ -11,7 +11,7 @@
 from tabulate import tabulate
 from zika_utils import fetch_sample_data
 
-from epp_utils import formula
+from scilifelab_epps.epp_utils import formula
 
 DESC = """
 EPP "ONT pooling", file slot "ONT pooling log".
diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py
index 2dc13bd2..95c0e64c 100644
--- a/scripts/ont_sync_to_db.py
+++ b/scripts/ont_sync_to_db.py
@@ -18,7 +18,7 @@
 from genologics.lims import Lims
 from ont_send_reloading_info_to_db import get_ONT_db
 
-from epp_utils import udf_tools
+from scilifelab_epps.epp_utils import udf_tools
 from scilifelab_epps.epp import upload_file
 
 DESC = """Script for finishing the step to start ONT sequencing in LIMS.
diff --git a/scripts/ont_update_amount.py b/scripts/ont_update_amount.py
index 174e4f3c..b0b215ca 100644
--- a/scripts/ont_update_amount.py
+++ b/scripts/ont_update_amount.py
@@ -8,7 +8,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from epp_utils import formula, udf_tools
+from scilifelab_epps.epp_utils import formula, udf_tools
 
 DESC = """ EPP "ONT Update Amounts".
 
diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py
index 07a7d936..c9856801 100644
--- a/scripts/parse_anglerfish_results.py
+++ b/scripts/parse_anglerfish_results.py
@@ -11,7 +11,7 @@
 from genologics.entities import Artifact, Process
 from genologics.lims import Lims
 
-from epp_utils import udf_tools
+from scilifelab_epps.epp_utils import udf_tools
 from scilifelab_epps.epp import upload_file
 
 TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S")
diff --git a/scripts/parse_ba_results.py b/scripts/parse_ba_results.py
index 9e7c7905..f19c9eac 100644
--- a/scripts/parse_ba_results.py
+++ b/scripts/parse_ba_results.py
@@ -11,7 +11,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from epp_utils import udf_tools
+from scilifelab_epps.epp_utils import udf_tools
 from scilifelab_epps.epp import get_well_number
 
 DESC = """This script parses the Agilent BioAnalyzer XML report.
diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py
index 503318a9..ab119967 100644
--- a/scripts/qc_amount_calculation.py
+++ b/scripts/qc_amount_calculation.py
@@ -16,7 +16,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from epp_utils import formula, udf_tools
+from scilifelab_epps.epp_utils import formula, udf_tools
 from scilifelab_epps.epp import EppLogger
 
 
diff --git a/scripts/zika_methods.py b/scripts/zika_methods.py
index 8a90548e..27d294a5 100644
--- a/scripts/zika_methods.py
+++ b/scripts/zika_methods.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pandas as pd
 
-from epp_utils.udf_tools import is_filled
+from scilifelab_epps.epp_utils.udf_tools import is_filled
 from scripts import zika_utils
 
 
diff --git a/scripts/zika_utils.py b/scripts/zika_utils.py
index 941e8da7..4fdb57d0 100644
--- a/scripts/zika_utils.py
+++ b/scripts/zika_utils.py
@@ -17,7 +17,7 @@
 import pandas as pd
 from genologics.entities import Process
 
-from epp_utils.udf_tools import fetch_last
+from scilifelab_epps.epp_utils.udf_tools import fetch_last
 
 
 def verify_step(currentStep, targets=None):

From 1fd77134ec1551f5ec7d7c24098eec716822df40 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Mon, 19 Aug 2024 10:55:17 +0200
Subject: [PATCH 04/50] add autobuild folder to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 113985a6..23506842 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ build/
 *.swp
 __pycache__
 node_modules
+dist

From 0b982c1656bd169ca88a4552ad8ef84e3fefa86d Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Mon, 19 Aug 2024 15:47:07 +0200
Subject: [PATCH 05/50] try constructing script wrapper

---
 scilifelab_epps/wrapper.py              | 101 ++++++++++++++++++
 scripts/generate_minknow_samplesheet.py | 133 +++++++-----------------
 2 files changed, 138 insertions(+), 96 deletions(-)
 create mode 100644 scilifelab_epps/wrapper.py

diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py
new file mode 100644
index 00000000..2542d510
--- /dev/null
+++ b/scilifelab_epps/wrapper.py
@@ -0,0 +1,101 @@
+import logging
+import os
+import sys
+from datetime import datetime as dt
+
+from genologics.config import BASEURI, PASSWORD, USERNAME
+from genologics.entities import Process
+from genologics.lims import Lims
+
+from scilifelab_epps.epp import upload_file
+
+
+def epp_decorator(file: str):
+    """Decorator for passing file info."""
+    SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
+
+    def _epp_decorator(script_main):
+        """Decorator for wrapping EPP scripts."""
+
+        def epp_wrapper(*args, **kwargs):
+            """General wrapper for EPP scripts."""
+
+            TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
+
+            # Set up LIMS
+            lims = Lims(BASEURI, USERNAME, PASSWORD)
+            lims.check_version()
+            process = Process(lims, id=args.pid)
+
+            # Name log file
+            log_filename: str = (
+                "_".join(
+                    [
+                        SCRIPT_NAME,
+                        process.id,
+                        TIMESTAMP,
+                        process.technician.name.replace(" ", ""),
+                    ]
+                )
+                + ".log"
+            )
+
+            # Set up logging
+            logging.basicConfig(
+                filename=log_filename,
+                filemode="w",
+                format="%(levelname)s: %(message)s",
+                level=logging.INFO,
+            )
+
+            # Start logging
+            logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
+            logging.info(
+                f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
+            )
+            args_str = "\n\t".join(
+                [f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]
+            )
+            logging.info(f"Script called with arguments: \n\t{args_str}")
+
+            # Run
+            try:
+                script_main(process, lims, *args, **kwargs)
+
+            except Exception as e:
+                # Post error to LIMS GUI
+                logging.error(str(e), exc_info=True)
+                logging.shutdown()
+                upload_file(
+                    file_path=log_filename,
+                    file_slot=args.log,
+                    process=process,
+                    lims=lims,
+                )
+                os.remove(log_filename)
+                sys.stderr.write(str(e))
+                sys.exit(2)
+            else:
+                logging.info("")
+                logging.info("Script completed successfully.")
+                logging.shutdown()
+                upload_file(
+                    file_path=log_filename,
+                    file_slot=args.log,
+                    process=process,
+                    lims=lims,
+                )
+                # Check log for errors and warnings
+                log_content = open(log_filename).read()
+                os.remove(log_filename)
+                if "ERROR:" in log_content or "WARNING:" in log_content:
+                    sys.stderr.write(
+                        "Script finished successfully, but log contains errors or warnings, please have a look."
+                    )
+                    sys.exit(2)
+                else:
+                    sys.exit(0)
+
+        return epp_wrapper
+
+    return _epp_decorator
diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py
index 88ce0410..f9289368 100644
--- a/scripts/generate_minknow_samplesheet.py
+++ b/scripts/generate_minknow_samplesheet.py
@@ -4,7 +4,6 @@
 import os
 import re
 import shutil
-import sys
 from argparse import ArgumentParser
 from datetime import datetime as dt
 
@@ -17,8 +16,9 @@
 from tabulate import tabulate
 
 from data.ONT_barcodes import ONT_BARCODE_LABEL_PATTERN, ONT_BARCODES
-from scilifelab_epps.epp_utils.udf_tools import fetch
 from scilifelab_epps.epp import traceback_to_step, upload_file
+from scilifelab_epps.epp_utils.udf_tools import fetch
+from scilifelab_epps.wrapper import epp_decorator
 
 DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs.
 """
@@ -298,7 +298,7 @@ def write_minknow_csv(df: pd.DataFrame, file_path: str):
     df_csv.to_csv(file_path, index=False)
 
 
-def generate_MinKNOW_samplesheet(process: Process):
+def generate_MinKNOW_samplesheet(args):
     """=== Sample sheet columns ===
 
     flow_cell_id                E.g. 'PAM96489'
@@ -322,6 +322,10 @@ def generate_MinKNOW_samplesheet(process: Process):
     - barcode
 
     """
+
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
     qc = True if "QC" in process.type.name else False
     logging.info(f"QC run: {qc}")
 
@@ -470,7 +474,35 @@ def generate_MinKNOW_samplesheet(process: Process):
     return file_name
 
 
-def main():
+@epp_decorator(__file__)
+def main(args):
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
+    file_name = generate_MinKNOW_samplesheet(args)
+
+    logging.info("Uploading samplesheet to LIMS...")
+    upload_file(
+        file_name,
+        args.file,
+        process,
+        lims,
+    )
+
+    logging.info("Moving samplesheet to ngi-nas-ns...")
+    try:
+        shutil.copyfile(
+            file_name,
+            f"/srv/ngi-nas-ns/samplesheets/nanopore/{dt.now().year}/{file_name}",
+        )
+        os.remove(file_name)
+    except:
+        logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True)
+    else:
+        logging.info("Samplesheet moved to ngi-nas-ns.")
+
+
+if __name__ == "__main__":
     # Parse args
     parser = ArgumentParser(description=DESC)
     parser.add_argument(
@@ -493,95 +525,4 @@ def main():
     )
     args = parser.parse_args()
 
-    # Set up LIMS
-    lims = Lims(BASEURI, USERNAME, PASSWORD)
-    lims.check_version()
-    process = Process(lims, id=args.pid)
-
-    # Set up logging
-    log_filename: str = (
-        "_".join(
-            [
-                SCRIPT_NAME,
-                process.id,
-                TIMESTAMP,
-                process.technician.name.replace(" ", ""),
-            ]
-        )
-        + ".log"
-    )
-
-    logging.basicConfig(
-        filename=log_filename,
-        filemode="w",
-        format="%(levelname)s: %(message)s",
-        level=logging.INFO,
-    )
-
-    # Start logging
-    logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
-    logging.info(
-        f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
-    )
-    args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)])
-    logging.info(f"Script called with arguments: \n\t{args_str}")
-
-    try:
-        file_name = generate_MinKNOW_samplesheet(process=process)
-        logging.info("Uploading samplesheet to LIMS...")
-        upload_file(
-            file_name,
-            args.file,
-            process,
-            lims,
-        )
-
-        logging.info("Moving samplesheet to ngi-nas-ns...")
-        try:
-            shutil.copyfile(
-                file_name,
-                f"/srv/ngi-nas-ns/samplesheets/nanopore/{dt.now().year}/{file_name}",
-            )
-            os.remove(file_name)
-        except:
-            logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True)
-        else:
-            logging.info("Samplesheet moved to ngi-nas-ns.")
-
-    except Exception as e:
-        # Post error to LIMS GUI
-        logging.error(str(e), exc_info=True)
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        os.remove(log_filename)
-        sys.stderr.write(str(e))
-        sys.exit(2)
-    else:
-        logging.info("")
-        logging.info("Script completed successfully.")
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        # Check log for errors and warnings
-        log_content = open(log_filename).read()
-        os.remove(log_filename)
-        if "ERROR:" in log_content or "WARNING:" in log_content:
-            sys.stderr.write(
-                "Script finished successfully, but log contains errors or warnings, please have a look."
-            )
-            sys.exit(2)
-        else:
-            sys.exit(0)
-
-
-if __name__ == "__main__":
-    main()
+    main(args)

From 9f599f56ca24e73fd76fbd2be55d7bc8fa1db22e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 11:50:27 +0200
Subject: [PATCH 06/50] exclude __init__.py in scripts/ from attempted entry
 point build

---
 setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 886deb80..9856d996 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,9 @@
     url="https://github.com/scilifelab/scilifelab_epps",
     license="GPLv3",
     packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
-    scripts=glob.glob("scripts/*.py"),
+    scripts=[
+        file for file in glob.glob("scripts/*.py") if file != "scripts/__init__.py"
+    ],
     include_package_data=True,
     zip_safe=False,
 )

From 2e694eb9c659462cd83cf039cf512691d9a6b71a Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 11:50:38 +0200
Subject: [PATCH 07/50] wip

---
 scilifelab_epps/wrapper.py              | 17 +++++++----------
 scripts/generate_minknow_samplesheet.py | 13 +++++++------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py
index 2542d510..48025426 100644
--- a/scilifelab_epps/wrapper.py
+++ b/scilifelab_epps/wrapper.py
@@ -11,16 +11,13 @@
 
 
 def epp_decorator(file: str):
-    """Decorator for passing file info."""
-    SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
+    script_name: str = os.path.basename(file).split(".")[0]
 
     def _epp_decorator(script_main):
-        """Decorator for wrapping EPP scripts."""
-
-        def epp_wrapper(*args, **kwargs):
+        def epp_wrapper(args):
             """General wrapper for EPP scripts."""
 
-            TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
+            timestamp = dt.now().strftime("%y%m%d_%H%M%S")
 
             # Set up LIMS
             lims = Lims(BASEURI, USERNAME, PASSWORD)
@@ -31,9 +28,9 @@ def epp_wrapper(*args, **kwargs):
             log_filename: str = (
                 "_".join(
                     [
-                        SCRIPT_NAME,
+                        script_name,
                         process.id,
-                        TIMESTAMP,
+                        timestamp,
                         process.technician.name.replace(" ", ""),
                     ]
                 )
@@ -49,7 +46,7 @@ def epp_wrapper(*args, **kwargs):
             )
 
             # Start logging
-            logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
+            logging.info(f"Script '{script_name}' started at {timestamp}.")
             logging.info(
                 f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
             )
@@ -60,7 +57,7 @@ def epp_wrapper(*args, **kwargs):
 
             # Run
             try:
-                script_main(process, lims, *args, **kwargs)
+                script_main(args)
 
             except Exception as e:
                 # Post error to LIMS GUI
diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py
index f9289368..9176b731 100644
--- a/scripts/generate_minknow_samplesheet.py
+++ b/scripts/generate_minknow_samplesheet.py
@@ -23,12 +23,6 @@
 DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs.
 """
 
-TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
-SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
-
-with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f:
-    config = yaml.safe_load(f)
-
 
 def get_ont_library_contents(
     ont_library: Artifact,
@@ -194,6 +188,9 @@ def get_ont_library_contents(
 
 
 def get_pool_sample_label_mapping(pool: Artifact) -> dict[str, str]:
+    with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f:
+        config = yaml.safe_load(f)
+
     # Setup DB connection
     connection = psycopg2.connect(
         user=config["username"],
@@ -525,4 +522,8 @@ def main(args):
     )
     args = parser.parse_args()
 
+    import ipdb
+
+    ipdb.set_trace()
+
     main(args)

From 8d4b54b153072ff2353d9ccbc49619b01e70c9a5 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 11:56:43 +0200
Subject: [PATCH 08/50] cleanup

---
 scilifelab_epps/wrapper.py              | 4 +++-
 scripts/generate_minknow_samplesheet.py | 4 ----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py
index 48025426..7a3c339a 100644
--- a/scilifelab_epps/wrapper.py
+++ b/scilifelab_epps/wrapper.py
@@ -59,6 +59,7 @@ def epp_wrapper(args):
             try:
                 script_main(args)
 
+            # On script error
             except Exception as e:
                 # Post error to LIMS GUI
                 logging.error(str(e), exc_info=True)
@@ -72,8 +73,9 @@ def epp_wrapper(args):
                 os.remove(log_filename)
                 sys.stderr.write(str(e))
                 sys.exit(2)
+
+            # On script success
             else:
-                logging.info("")
                 logging.info("Script completed successfully.")
                 logging.shutdown()
                 upload_file(
diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py
index 9176b731..4ce599cc 100644
--- a/scripts/generate_minknow_samplesheet.py
+++ b/scripts/generate_minknow_samplesheet.py
@@ -522,8 +522,4 @@ def main(args):
     )
     args = parser.parse_args()
 
-    import ipdb
-
-    ipdb.set_trace()
-
     main(args)

From f2441a5bfbd37cccc2b13e6f58245dfb5e4f1ada Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 12:16:26 +0200
Subject: [PATCH 09/50] ready for testing

---
 scilifelab_epps/wrapper.py              | 12 +++++++-----
 scripts/generate_minknow_samplesheet.py |  4 +++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py
index 7a3c339a..0d10ee4d 100644
--- a/scilifelab_epps/wrapper.py
+++ b/scilifelab_epps/wrapper.py
@@ -1,7 +1,6 @@
 import logging
 import os
 import sys
-from datetime import datetime as dt
 
 from genologics.config import BASEURI, PASSWORD, USERNAME
 from genologics.entities import Process
@@ -10,15 +9,18 @@
 from scilifelab_epps.epp import upload_file
 
 
-def epp_decorator(file: str):
-    script_name: str = os.path.basename(file).split(".")[0]
+def epp_decorator(script_path: str, timestamp: str):
+    """This top-level decorator is meant to be used on EPP scripts' main functions.
+
+    It receives the script path (__file__) and timestamp (yymmdd_hhmmss) as arguments to
+    pass on to it's children which wrap the main function to handle logging and graceful failure.
+    """
+    script_name: str = os.path.basename(script_path).split(".")[0]
 
     def _epp_decorator(script_main):
         def epp_wrapper(args):
             """General wrapper for EPP scripts."""
 
-            timestamp = dt.now().strftime("%y%m%d_%H%M%S")
-
             # Set up LIMS
             lims = Lims(BASEURI, USERNAME, PASSWORD)
             lims.check_version()
diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py
index 4ce599cc..de2e0828 100644
--- a/scripts/generate_minknow_samplesheet.py
+++ b/scripts/generate_minknow_samplesheet.py
@@ -23,6 +23,8 @@
 DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs.
 """
 
+TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
+
 
 def get_ont_library_contents(
     ont_library: Artifact,
@@ -471,7 +473,7 @@ def generate_MinKNOW_samplesheet(args):
     return file_name
 
 
-@epp_decorator(__file__)
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
 def main(args):
     lims = Lims(BASEURI, USERNAME, PASSWORD)
     process = Process(lims, id=args.pid)

From 42137a07ba986131e1f0ba3c7812937eca9fedfc Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 12:21:35 +0200
Subject: [PATCH 10/50] add header

---
 scilifelab_epps/wrapper.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py
index 0d10ee4d..5380fbee 100644
--- a/scilifelab_epps/wrapper.py
+++ b/scilifelab_epps/wrapper.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import logging
 import os
 import sys

From 71ad55709fd1f7a207769b69b8ea64ba5ced3d3b Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 12:34:13 +0200
Subject: [PATCH 11/50] renaming and reorganizing

---
 .../__init__.py                                  |  0
 .../calculation_methods.py                       |  4 ++--
 .../udf_arg_methods.py                           |  2 +-
 scilifelab_epps/{epp_utils => utils}/__init__.py |  0
 scilifelab_epps/{epp_utils => utils}/formula.py  |  0
 .../{epp_utils => utils}/udf_tools.py            |  0
 scilifelab_epps/zika/__init__.py                 |  0
 .../zika/methods.py                              |  2 +-
 .../zika/utils.py                                |  2 +-
 scripts/bravo_csv.py                             | 16 +++++++---------
 scripts/calc_from_args.py                        |  2 +-
 scripts/generate_minknow_samplesheet.py          |  2 +-
 scripts/log_udfs.py                              |  2 +-
 scripts/molar_concentration.py                   |  2 +-
 scripts/ont_calc_volumes.py                      |  2 +-
 scripts/ont_pool.py                              |  2 +-
 scripts/ont_sync_to_db.py                        |  2 +-
 scripts/ont_update_amount.py                     |  2 +-
 scripts/parse_anglerfish_results.py              |  2 +-
 scripts/parse_ba_results.py                      |  2 +-
 scripts/qc_amount_calculation.py                 |  2 +-
 21 files changed, 23 insertions(+), 25 deletions(-)
 rename scilifelab_epps/{calc_from_args_utils => calc_from_args}/__init__.py (100%)
 rename scilifelab_epps/{calc_from_args_utils => calc_from_args}/calculation_methods.py (99%)
 rename scilifelab_epps/{calc_from_args_utils => calc_from_args}/udf_arg_methods.py (98%)
 rename scilifelab_epps/{epp_utils => utils}/__init__.py (100%)
 rename scilifelab_epps/{epp_utils => utils}/formula.py (100%)
 rename scilifelab_epps/{epp_utils => utils}/udf_tools.py (100%)
 create mode 100644 scilifelab_epps/zika/__init__.py
 rename scripts/zika_methods.py => scilifelab_epps/zika/methods.py (99%)
 rename scripts/zika_utils.py => scilifelab_epps/zika/utils.py (99%)

diff --git a/scilifelab_epps/calc_from_args_utils/__init__.py b/scilifelab_epps/calc_from_args/__init__.py
similarity index 100%
rename from scilifelab_epps/calc_from_args_utils/__init__.py
rename to scilifelab_epps/calc_from_args/__init__.py
diff --git a/scilifelab_epps/calc_from_args_utils/calculation_methods.py b/scilifelab_epps/calc_from_args/calculation_methods.py
similarity index 99%
rename from scilifelab_epps/calc_from_args_utils/calculation_methods.py
rename to scilifelab_epps/calc_from_args/calculation_methods.py
index 7e7abfc3..f048a0ea 100644
--- a/scilifelab_epps/calc_from_args_utils/calculation_methods.py
+++ b/scilifelab_epps/calc_from_args/calculation_methods.py
@@ -7,12 +7,12 @@
 import tabulate
 from genologics.entities import Process
 
-from scilifelab_epps.calc_from_args_utils.udf_arg_methods import (
+from scilifelab_epps.calc_from_args.udf_arg_methods import (
     fetch_from_arg,
     get_UDF_source,
     get_UDF_source_name,
 )
-from scilifelab_epps.epp_utils import formula, udf_tools
+from scilifelab_epps.utils import formula, udf_tools
 
 DESC = """This file contains the method functions for a UDF-agnostic script."""
 
diff --git a/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py b/scilifelab_epps/calc_from_args/udf_arg_methods.py
similarity index 98%
rename from scilifelab_epps/calc_from_args_utils/udf_arg_methods.py
rename to scilifelab_epps/calc_from_args/udf_arg_methods.py
index a321025f..1040c91d 100644
--- a/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py
+++ b/scilifelab_epps/calc_from_args/udf_arg_methods.py
@@ -5,7 +5,7 @@
 import yaml
 from genologics.entities import Artifact, Process
 
-from scilifelab_epps.epp_utils import udf_tools
+from scilifelab_epps.utils import udf_tools
 
 
 def fetch_from_arg(
diff --git a/scilifelab_epps/epp_utils/__init__.py b/scilifelab_epps/utils/__init__.py
similarity index 100%
rename from scilifelab_epps/epp_utils/__init__.py
rename to scilifelab_epps/utils/__init__.py
diff --git a/scilifelab_epps/epp_utils/formula.py b/scilifelab_epps/utils/formula.py
similarity index 100%
rename from scilifelab_epps/epp_utils/formula.py
rename to scilifelab_epps/utils/formula.py
diff --git a/scilifelab_epps/epp_utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py
similarity index 100%
rename from scilifelab_epps/epp_utils/udf_tools.py
rename to scilifelab_epps/utils/udf_tools.py
diff --git a/scilifelab_epps/zika/__init__.py b/scilifelab_epps/zika/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/scripts/zika_methods.py b/scilifelab_epps/zika/methods.py
similarity index 99%
rename from scripts/zika_methods.py
rename to scilifelab_epps/zika/methods.py
index 27d294a5..77c24e23 100644
--- a/scripts/zika_methods.py
+++ b/scilifelab_epps/zika/methods.py
@@ -11,7 +11,7 @@
 import numpy as np
 import pandas as pd
 
-from scilifelab_epps.epp_utils.udf_tools import is_filled
+from scilifelab_epps.utils.udf_tools import is_filled
 from scripts import zika_utils
 
 
diff --git a/scripts/zika_utils.py b/scilifelab_epps/zika/utils.py
similarity index 99%
rename from scripts/zika_utils.py
rename to scilifelab_epps/zika/utils.py
index 4fdb57d0..26b0b528 100644
--- a/scripts/zika_utils.py
+++ b/scilifelab_epps/zika/utils.py
@@ -17,7 +17,7 @@
 import pandas as pd
 from genologics.entities import Process
 
-from scilifelab_epps.epp_utils.udf_tools import fetch_last
+from scilifelab_epps.utils.udf_tools import fetch_last
 
 
 def verify_step(currentStep, targets=None):
diff --git a/scripts/bravo_csv.py b/scripts/bravo_csv.py
index 95f3a038..29096fe3 100644
--- a/scripts/bravo_csv.py
+++ b/scripts/bravo_csv.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 
-
 import logging
 import os
 import re
@@ -8,12 +7,11 @@
 from argparse import ArgumentParser
 
 import pandas as pd
-import zika_methods
-import zika_utils
 from genologics.config import BASEURI, PASSWORD, USERNAME
 from genologics.entities import Process
 from genologics.lims import Lims
 
+from scilifelab_epps import zika
 from scilifelab_epps.epp import attach_file
 
 DESC = """EPP used to create csv files for the bravo robot"""
@@ -263,12 +261,12 @@ def prepooling(currentStep, lims):
 
     if currentStep.instrument.name == "Zika":
         if currentStep.type.name == "Illumina DNA No-QC Library Pooling":
-            zika_methods.pool_fixed_vol(
+            zika.methods.pool_fixed_vol(
                 currentStep=currentStep,
                 lims=lims,
             )
         else:
-            zika_methods.pool(
+            zika.methods.pool(
                 currentStep=currentStep,
                 lims=lims,
                 udfs={
@@ -369,14 +367,14 @@ def setup_qpcr(currentStep, lims):
 
 def default_bravo(lims, currentStep, with_total_vol=True):
     # Re-route to Zika
-    if zika_utils.verify_step(
+    if zika.utils.verify_step(
         currentStep,
         targets=[
             ("SMARTer Pico RNA", "Setup Workset/Plate"),
             ("QIAseq miRNA", "Setup Workset/Plate"),
         ],
     ):
-        zika_methods.norm(
+        zika.methods.norm(
             currentStep=currentStep,
             lims=lims,
             udfs={
@@ -388,10 +386,10 @@ def default_bravo(lims, currentStep, with_total_vol=True):
                 "final_conc": None,
             },
         )
-    elif zika_utils.verify_step(
+    elif zika.utils.verify_step(
         currentStep, targets=[("Amplicon", "Setup Workset/Plate")]
     ):
-        zika_methods.norm(
+        zika.methods.norm(
             currentStep=currentStep,
             lims=lims,
             # Use lower minimum pipetting volume and customer metrics
diff --git a/scripts/calc_from_args.py b/scripts/calc_from_args.py
index 1f6e48b1..d5f73932 100644
--- a/scripts/calc_from_args.py
+++ b/scripts/calc_from_args.py
@@ -9,7 +9,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.calc_from_args_utils import calculation_methods
+from scilifelab_epps.calc_from_args import calculation_methods
 from scilifelab_epps.epp import upload_file
 
 DESC = """UDF-agnostic script to perform calculations across all artifacts of a step.
diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py
index de2e0828..1a957632 100644
--- a/scripts/generate_minknow_samplesheet.py
+++ b/scripts/generate_minknow_samplesheet.py
@@ -17,7 +17,7 @@
 
 from data.ONT_barcodes import ONT_BARCODE_LABEL_PATTERN, ONT_BARCODES
 from scilifelab_epps.epp import traceback_to_step, upload_file
-from scilifelab_epps.epp_utils.udf_tools import fetch
+from scilifelab_epps.utils.udf_tools import fetch
 from scilifelab_epps.wrapper import epp_decorator
 
 DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs.
diff --git a/scripts/log_udfs.py b/scripts/log_udfs.py
index 24fb52f6..e42d6af0 100644
--- a/scripts/log_udfs.py
+++ b/scripts/log_udfs.py
@@ -12,7 +12,7 @@
 from ont_send_reloading_info_to_db import parse_run
 from tabulate import tabulate
 
-from scilifelab_epps.epp_utils import udf_tools
+from scilifelab_epps.utils import udf_tools
 
 DESC = """Script for the EPP "Log fields" and file slot "Field log".
 
diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py
index 1b0eee71..e7603da0 100644
--- a/scripts/molar_concentration.py
+++ b/scripts/molar_concentration.py
@@ -16,7 +16,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp_utils.formula import ng_ul_to_nM
+from scilifelab_epps.utils.formula import ng_ul_to_nM
 from scilifelab_epps.epp import EppLogger
 
 
diff --git a/scripts/ont_calc_volumes.py b/scripts/ont_calc_volumes.py
index b9528a9f..9c7520ae 100644
--- a/scripts/ont_calc_volumes.py
+++ b/scripts/ont_calc_volumes.py
@@ -8,7 +8,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp_utils import formula, udf_tools
+from scilifelab_epps.utils import formula, udf_tools
 
 DESC = """
 EPP "ONT calculate volumes"
diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py
index 9a8b7304..076dc8e4 100644
--- a/scripts/ont_pool.py
+++ b/scripts/ont_pool.py
@@ -11,7 +11,7 @@
 from tabulate import tabulate
 from zika_utils import fetch_sample_data
 
-from scilifelab_epps.epp_utils import formula
+from scilifelab_epps.utils import formula
 
 DESC = """
 EPP "ONT pooling", file slot "ONT pooling log".
diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py
index 95c0e64c..29367148 100644
--- a/scripts/ont_sync_to_db.py
+++ b/scripts/ont_sync_to_db.py
@@ -18,7 +18,7 @@
 from genologics.lims import Lims
 from ont_send_reloading_info_to_db import get_ONT_db
 
-from scilifelab_epps.epp_utils import udf_tools
+from scilifelab_epps.utils import udf_tools
 from scilifelab_epps.epp import upload_file
 
 DESC = """Script for finishing the step to start ONT sequencing in LIMS.
diff --git a/scripts/ont_update_amount.py b/scripts/ont_update_amount.py
index b0b215ca..f3c6fbfc 100644
--- a/scripts/ont_update_amount.py
+++ b/scripts/ont_update_amount.py
@@ -8,7 +8,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp_utils import formula, udf_tools
+from scilifelab_epps.utils import formula, udf_tools
 
 DESC = """ EPP "ONT Update Amounts".
 
diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py
index c9856801..6fec39b5 100644
--- a/scripts/parse_anglerfish_results.py
+++ b/scripts/parse_anglerfish_results.py
@@ -11,7 +11,7 @@
 from genologics.entities import Artifact, Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp_utils import udf_tools
+from scilifelab_epps.utils import udf_tools
 from scilifelab_epps.epp import upload_file
 
 TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S")
diff --git a/scripts/parse_ba_results.py b/scripts/parse_ba_results.py
index f19c9eac..78f96465 100644
--- a/scripts/parse_ba_results.py
+++ b/scripts/parse_ba_results.py
@@ -11,7 +11,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp_utils import udf_tools
+from scilifelab_epps.utils import udf_tools
 from scilifelab_epps.epp import get_well_number
 
 DESC = """This script parses the Agilent BioAnalyzer XML report.
diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py
index ab119967..ab39568c 100644
--- a/scripts/qc_amount_calculation.py
+++ b/scripts/qc_amount_calculation.py
@@ -16,7 +16,7 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp_utils import formula, udf_tools
+from scilifelab_epps.utils import formula, udf_tools
 from scilifelab_epps.epp import EppLogger
 
 

From 400488b660d52ee41bc3dc921629dfcc69f240cc Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 12:34:23 +0200
Subject: [PATCH 12/50] implement wrapper

---
 scripts/generate_anglerfish_samplesheet.py | 125 +++++----------------
 1 file changed, 30 insertions(+), 95 deletions(-)

diff --git a/scripts/generate_anglerfish_samplesheet.py b/scripts/generate_anglerfish_samplesheet.py
index e7a6f9f7..3b4cc925 100644
--- a/scripts/generate_anglerfish_samplesheet.py
+++ b/scripts/generate_anglerfish_samplesheet.py
@@ -4,7 +4,6 @@
 import os
 import re
 import shutil
-import sys
 from argparse import ArgumentParser
 from datetime import datetime as dt
 
@@ -16,12 +15,12 @@
 from data.Chromium_10X_indexes import Chromium_10X_indexes
 from data.ONT_barcodes import ONT_BARCODES
 from scilifelab_epps.epp import upload_file
+from scilifelab_epps.wrapper import epp_decorator
 
 DESC = """Script to generate Anglerfish samplesheet for ONT runs.
 """
 
 TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
-SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
 
 
 def generate_anglerfish_samplesheet(process):
@@ -124,7 +123,35 @@ def get_adaptor_name(reagent_label: str) -> str | list[str]:
         )
 
 
-def main():
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args):
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
+    file_name = generate_anglerfish_samplesheet(process)
+
+    logging.info("Uploading samplesheet to LIMS...")
+    upload_file(
+        file_name,
+        args.file,
+        process,
+        lims,
+    )
+
+    logging.info("Moving samplesheet to ngi-nas-ns...")
+    try:
+        shutil.copyfile(
+            file_name,
+            f"/srv/ngi-nas-ns/samplesheets/anglerfish/{dt.now().year}/{file_name}",
+        )
+        os.remove(file_name)
+    except:
+        logging.error("Failed to move samplesheet to ngi-nas-ns.")
+    else:
+        logging.info("Samplesheet moved to ngi-nas-ns.")
+
+
+if __name__ == "__main__":
     # Parse args
     parser = ArgumentParser(description=DESC)
     parser.add_argument(
@@ -147,96 +174,4 @@ def main():
     )
     args = parser.parse_args()
 
-    # Set up LIMS
-    lims = Lims(BASEURI, USERNAME, PASSWORD)
-    lims.check_version()
-    process = Process(lims, id=args.pid)
-
-    # Set up logging
-    log_filename: str = (
-        "_".join(
-            [
-                SCRIPT_NAME,
-                process.id,
-                TIMESTAMP,
-                process.technician.name.replace(" ", ""),
-            ]
-        )
-        + ".log"
-    )
-
-    logging.basicConfig(
-        filename=log_filename,
-        filemode="w",
-        format="%(levelname)s: %(message)s",
-        level=logging.INFO,
-    )
-
-    # Start logging
-    logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
-    logging.info(
-        f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
-    )
-    args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)])
-    logging.info(f"Script called with arguments: \n\t{args_str}")
-
-    try:
-        file_name = generate_anglerfish_samplesheet(process)
-
-        logging.info("Uploading samplesheet to LIMS...")
-        upload_file(
-            file_name,
-            args.file,
-            process,
-            lims,
-        )
-
-        logging.info("Moving samplesheet to ngi-nas-ns...")
-        try:
-            shutil.copyfile(
-                file_name,
-                f"/srv/ngi-nas-ns/samplesheets/anglerfish/{dt.now().year}/{file_name}",
-            )
-            os.remove(file_name)
-        except:
-            logging.error("Failed to move samplesheet to ngi-nas-ns.")
-        else:
-            logging.info("Samplesheet moved to ngi-nas-ns.")
-
-    except Exception as e:
-        # Post error to LIMS GUI
-        logging.error(str(e), exc_info=True)
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        os.remove(log_filename)
-        sys.stderr.write(str(e))
-        sys.exit(2)
-    else:
-        logging.info("")
-        logging.info("Script completed successfully.")
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        # Check log for errors and warnings
-        log_content = open(log_filename).read()
-        os.remove(log_filename)
-        if "ERROR:" in log_content or "WARNING:" in log_content:
-            sys.stderr.write(
-                "Script finished successfully, but log contains errors or warnings, please have a look."
-            )
-            sys.exit(2)
-        else:
-            sys.exit(0)
-
-
-if __name__ == "__main__":
     main()

From c1867fd59bd72dfa153f80e171a498cbd783fb97 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:16:35 +0200
Subject: [PATCH 13/50] wrap script

---
 scripts/ont_send_reloading_info_to_db.py | 76 ++++--------------------
 1 file changed, 11 insertions(+), 65 deletions(-)

diff --git a/scripts/ont_send_reloading_info_to_db.py b/scripts/ont_send_reloading_info_to_db.py
index 2da91b0c..de8fc6d8 100644
--- a/scripts/ont_send_reloading_info_to_db.py
+++ b/scripts/ont_send_reloading_info_to_db.py
@@ -3,7 +3,6 @@
 import logging
 import os
 import re
-import sys
 from argparse import ArgumentParser
 from datetime import datetime as dt
 
@@ -14,7 +13,7 @@
 from genologics.entities import Artifact, Process
 from genologics.lims import Lims
 
-from scilifelab_epps.epp import upload_file
+from scilifelab_epps.wrapper import epp_decorator
 
 DESC = """Used to record the washing and reloading of ONT flow cells.
 
@@ -22,7 +21,6 @@
 """
 
 TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S")
-SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
 
 
 def send_reloading_info_to_db(process: Process):
@@ -178,71 +176,19 @@ def check_csv_udf_list(pattern: str, csv_udf_list: list[str]) -> bool:
         return True
 
 
-def main():
-    # Parse args
-    parser = ArgumentParser(description=DESC)
-    parser.add_argument("--pid", help="Lims id for current Process")
-    parser.add_argument("--log", type=str, help="Which log file slot to use")
-    args = parser.parse_args()
-
-    # Set up LIMS
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args):
     lims = Lims(BASEURI, USERNAME, PASSWORD)
-    lims.check_version()
     process = Process(lims, id=args.pid)
 
-    # Set up logging
-    log_filename: str = (
-        "_".join(
-            [
-                SCRIPT_NAME,
-                process.id,
-                TIMESTAMP,
-                process.technician.name.replace(" ", ""),
-            ]
-        )
-        + ".log"
-    )
-
-    logging.basicConfig(
-        filename=log_filename,
-        filemode="w",
-        format="%(levelname)s: %(message)s",
-        level=logging.INFO,
-    )
-
-    # Start logging
-    logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
-    logging.info(
-        f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
-    )
-    args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)])
-    logging.info(f"Script called with arguments: \n\t{args_str}")
-
-    try:
-        send_reloading_info_to_db(process)
-    except Exception as e:
-        # Post error to LIMS GUI
-        logging.error(e)
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        sys.stderr.write(str(e))
-        sys.exit(2)
-    else:
-        logging.info("Script completed successfully.")
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        sys.exit(0)
+    send_reloading_info_to_db(process)
 
 
 if __name__ == "__main__":
-    main()
+    # Parse args
+    parser = ArgumentParser(description=DESC)
+    parser.add_argument("--pid", help="Lims id for current Process")
+    parser.add_argument("--log", type=str, help="Which log file slot to use")
+    args = parser.parse_args()
+
+    main(args)

From 199193c5129f8b6c4ae0bcdf855453927918d909 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:18:36 +0200
Subject: [PATCH 14/50] wrap script

---
 scripts/ont_sync_to_db.py | 78 +++++++--------------------------------
 1 file changed, 13 insertions(+), 65 deletions(-)

diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py
index 29367148..9b787c83 100644
--- a/scripts/ont_sync_to_db.py
+++ b/scripts/ont_sync_to_db.py
@@ -3,7 +3,6 @@
 import logging
 import os
 import re
-import sys
 from argparse import ArgumentParser, Namespace
 from datetime import datetime as dt
 
@@ -19,7 +18,7 @@
 from ont_send_reloading_info_to_db import get_ONT_db
 
 from scilifelab_epps.utils import udf_tools
-from scilifelab_epps.epp import upload_file
+from scilifelab_epps.wrapper import epp_decorator
 
 DESC = """Script for finishing the step to start ONT sequencing in LIMS.
 
@@ -28,7 +27,6 @@
 """
 
 TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S")
-SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
 
 
 def assert_samplesheet(process: Process, args: Namespace, lims: Lims):
@@ -241,7 +239,17 @@ def sync_runs_to_db(process: Process, args: Namespace, lims: Lims):
         )
 
 
-def main():
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args):
+    # Set up LIMS
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    lims.check_version()
+    process = Process(lims, id=args.pid)
+
+    sync_runs_to_db(process=process, lims=lims, args=args)
+
+
+if __name__ == "__main__":
     # Parse args
     parser = ArgumentParser(description=DESC)
     parser.add_argument(
@@ -263,64 +271,4 @@ def main():
     )
     args: Namespace = parser.parse_args()
 
-    # Set up LIMS
-    lims = Lims(BASEURI, USERNAME, PASSWORD)
-    lims.check_version()
-    process = Process(lims, id=args.pid)
-
-    # Set up logging
-    log_filename: str = (
-        "_".join(
-            [
-                SCRIPT_NAME,
-                process.id,
-                TIMESTAMP,
-                process.technician.name.replace(" ", ""),
-            ]
-        )
-        + ".log"
-    )
-
-    logging.basicConfig(
-        filename=log_filename,
-        filemode="w",
-        format="%(filename)s - %(funcName)s - %(levelname)s - %(message)s",
-        level=logging.INFO,
-    )
-
-    # Start logging
-    logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
-    logging.info(
-        f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
-    )
-    args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)])
-    logging.info(f"Script called with arguments: \n\t{args_str}")
-
-    try:
-        sync_runs_to_db(process=process, lims=lims, args=args)
-    except Exception as e:
-        # Post error to LIMS GUI
-        logging.error(e, exc_info=True)
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        sys.stderr.write(str(e))
-        sys.exit(2)
-    else:
-        logging.info("Script completed successfully.")
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        sys.exit(0)
-
-
-if __name__ == "__main__":
-    main()
+    main(args)

From 665a3cf1a382a33c318d933bc15a99da1884ab15 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:21:00 +0200
Subject: [PATCH 15/50] wrap script

---
 scripts/parse_anglerfish_results.py | 77 +++++------------------------
 1 file changed, 12 insertions(+), 65 deletions(-)

diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py
index 6fec39b5..13797087 100644
--- a/scripts/parse_anglerfish_results.py
+++ b/scripts/parse_anglerfish_results.py
@@ -2,7 +2,6 @@
 import glob
 import logging
 import os
-import sys
 from argparse import ArgumentParser
 from datetime import datetime as dt
 
@@ -12,10 +11,9 @@
 from genologics.lims import Lims
 
 from scilifelab_epps.utils import udf_tools
-from scilifelab_epps.epp import upload_file
+from scilifelab_epps.wrapper import epp_decorator
 
 TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S")
-SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
 
 
 def find_run(process: Process) -> str:
@@ -197,7 +195,16 @@ def parse_anglerfish_results(process, lims):
     fill_udfs(process, df_parsed)
 
 
-def main():
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args):
+    # Set up LIMS
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
+    parse_anglerfish_results(process, lims)
+
+
+if __name__ == "__main__":
     # Parse args
     parser = ArgumentParser()
     parser.add_argument(
@@ -217,64 +224,4 @@ def main():
     )
     args = parser.parse_args()
 
-    # Set up LIMS
-    lims = Lims(BASEURI, USERNAME, PASSWORD)
-    lims.check_version()
-    process = Process(lims, id=args.pid)
-
-    # Set up logging
-    log_filename = (
-        "_".join(
-            [
-                SCRIPT_NAME,
-                process.id,
-                TIMESTAMP,
-                process.technician.name.replace(" ", ""),
-            ]
-        )
-        + ".log"
-    )
-
-    logging.basicConfig(
-        filename=log_filename,
-        filemode="w",
-        format="%(levelname)s: %(message)s",
-        level=logging.INFO,
-    )
-
-    # Start logging
-    logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
-    logging.info(
-        f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
-    )
-    args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)])
-    logging.info(f"Script called with arguments: \n\t{args_str}")
-
-    try:
-        parse_anglerfish_results(process, lims)
-    except Exception as e:
-        # Post error to LIMS GUI
-        logging.error(e, exc_info=True)
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        sys.stderr.write(str(e))
-        sys.exit(2)
-    else:
-        logging.info("Script completed successfully.")
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-        )
-        sys.exit(0)
-
-
-if __name__ == "__main__":
-    main()
+    main(args)

From bf7db1fc2bfa126f1600bed2e501afe4e4b54f89 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:31:18 +0200
Subject: [PATCH 16/50] wrap script

---
 scripts/calc_from_args.py | 95 +++++++--------------------------------
 1 file changed, 15 insertions(+), 80 deletions(-)

diff --git a/scripts/calc_from_args.py b/scripts/calc_from_args.py
index d5f73932..82332b9c 100644
--- a/scripts/calc_from_args.py
+++ b/scripts/calc_from_args.py
@@ -1,7 +1,4 @@
 #!/usr/bin/env python
-import logging
-import os
-import sys
 from argparse import ArgumentParser
 from datetime import datetime as dt
 
@@ -10,7 +7,7 @@
 from genologics.lims import Lims
 
 from scilifelab_epps.calc_from_args import calculation_methods
-from scilifelab_epps.epp import upload_file
+from scilifelab_epps.wrapper import epp_decorator
 
 DESC = """UDF-agnostic script to perform calculations across all artifacts of a step.
 
@@ -20,7 +17,6 @@
 """
 
 TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S")
-SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0]
 
 
 def parse_udf_arg(arg_string: str) -> dict:
@@ -71,7 +67,8 @@ def parse_udf_arg(arg_string: str) -> dict:
     return arg_dict
 
 
-def main():
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args):
     f"""Set up log, LIMS instance and parse args.
 
     Example 1:
@@ -124,6 +121,15 @@ def main():
 
     """
 
+    # Set up LIMS
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
+    function_to_use = getattr(calculation_methods, args.calc)
+    function_to_use(process, args)
+
+
+if __name__ == "__main__":
     # Parse args
     parser = ArgumentParser(description=DESC)
     parser.add_argument("--pid", type=str, help="Lims ID for current Process")
@@ -134,6 +140,7 @@ def main():
         help="Which function to use for calculations",
     )
     parser.add_argument("--log", type=str, help="Which log file slot to use")
+
     # UDFs to use for calculations
     udf_args = [
         "vol_in",
@@ -146,79 +153,7 @@ def main():
     ]
     for udf_arg in udf_args:
         parser.add_argument(f"--{udf_arg}", type=parse_udf_arg)
-    args = parser.parse_args()
-
-    # Set up LIMS
-    lims = Lims(BASEURI, USERNAME, PASSWORD)
-    lims.check_version()
-    process = Process(lims, id=args.pid)
-
-    # Set up logging
-    log_filename: str = (
-        "_".join(
-            [
-                SCRIPT_NAME,
-                args.calc,
-                process.id,
-                TIMESTAMP,
-                process.technician.name.replace(" ", ""),
-            ]
-        )
-        + ".log"
-    )
-
-    logging.basicConfig(
-        filename=log_filename,
-        filemode="w",
-        format="%(levelname)s: %(message)s",
-        level=logging.INFO,
-    )
-
-    # Start logging
-    logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.")
-    logging.info(
-        f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}."
-    )
-    args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)])
-    logging.info(f"Script called with arguments: \n\t{args_str}")
-
-    try:
-        function_to_use = getattr(calculation_methods, args.calc)
-        function_to_use(process, args)
-    except Exception as e:
-        # Post error to LIMS GUI
-        logging.error(str(e), exc_info=True)
-        logging.shutdown()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-            remove=True,
-        )
-        sys.stderr.write(str(e))
-        sys.exit(2)
-    else:
-        logging.info("")
-        logging.info("Script completed successfully.")
-        logging.shutdown()
-        log_content = open(log_filename).read()
-        upload_file(
-            file_path=log_filename,
-            file_slot=args.log,
-            process=process,
-            lims=lims,
-            remove=True,
-        )
-        # Check log for errors and warnings
-        if "ERROR:" in log_content or "WARNING:" in log_content:
-            sys.stderr.write(
-                "Script finished successfully, but log contains errors or warnings, please have a look."
-            )
-            sys.exit(2)
-        else:
-            sys.exit(0)
 
+    args = parser.parse_args()
 
-if __name__ == "__main__":
-    main()
+    main(args)

From 30feb204e292e17e1b2b1d4a23ee1681090c8887 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:36:54 +0200
Subject: [PATCH 17/50] fix zika references

---
 scilifelab_epps/zika/methods.py | 54 ++++++++++++++++-----------------
 scripts/ont_pool.py             |  2 +-
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/scilifelab_epps/zika/methods.py b/scilifelab_epps/zika/methods.py
index 77c24e23..7c596454 100644
--- a/scilifelab_epps/zika/methods.py
+++ b/scilifelab_epps/zika/methods.py
@@ -11,8 +11,8 @@
 import numpy as np
 import pandas as pd
 
+from scilifelab_epps import zika
 from scilifelab_epps.utils.udf_tools import is_filled
-from scripts import zika_utils
 
 
 def pool_fixed_vol(
@@ -57,7 +57,7 @@ def pool_fixed_vol(
         "dst_id": "art_tuple[1]['uri'].location[0].id",
         "dst_well": "art_tuple[1]['uri'].location[1]",
     }
-    df_all = zika_utils.fetch_sample_data(currentStep, to_fetch)
+    df_all = zika.utils.fetch_sample_data(currentStep, to_fetch)
 
     # Define deck, a dictionary mapping plate names to deck positions
     assert len(df_all.src_id.unique()) <= 4, "Only one to four input plates allowed"
@@ -77,22 +77,22 @@ def pool_fixed_vol(
         df_wl = pd.concat([df_wl, df_pool], axis=0)
 
     # Format worklist
-    df_formatted = zika_utils.format_worklist(df_wl.copy(), deck)
-    wl_filename, log_filename = zika_utils.get_filenames(
+    df_formatted = zika.utils.format_worklist(df_wl.copy(), deck)
+    wl_filename, log_filename = zika.utils.get_filenames(
         method_name="pool", pid=currentStep.id
     )
 
     # Write the output files
-    zika_utils.write_worklist(
+    zika.utils.write_worklist(
         df=df_formatted.copy(),
         deck=deck,
         wl_filename=wl_filename,
     )
-    zika_utils.write_log(log, log_filename)
+    zika.utils.write_log(log, log_filename)
 
     # Upload files
-    zika_utils.upload_csv(currentStep, lims, wl_filename)
-    zika_utils.upload_log(currentStep, lims, log_filename)
+    zika.utils.upload_csv(currentStep, lims, wl_filename)
+    zika.utils.upload_log(currentStep, lims, log_filename)
 
     # Issue warnings, if any
     if any("WARNING" in entry for entry in log):
@@ -187,7 +187,7 @@ def pool(
             if v:
                 to_fetch[k] = f"art_tuple[1]['uri'].udf['{v}']"
 
-        df_all = zika_utils.fetch_sample_data(currentStep, to_fetch)
+        df_all = zika.utils.fetch_sample_data(currentStep, to_fetch)
 
         # All samples should have accessible volume
         assert all(
@@ -321,7 +321,7 @@ def pool(
                         )
 
                         errors = True
-                        raise zika_utils.VolumeOverflow
+                        raise zika.utils.VolumeOverflow
 
                     log.append(
                         "\nAn even pool can be created within the following parameter ranges:"
@@ -409,7 +409,7 @@ def pool(
                         )
 
                         errors = True
-                        raise zika_utils.VolumeOverflow
+                        raise zika.utils.VolumeOverflow
 
                     log.append(
                         "\nWill try to create a pool that is as even as possible. Accounting for sample depletion, a pool can be created with the following parameter ranges: "
@@ -436,7 +436,7 @@ def pool(
                     # No volume expansion is allowed, so pool volume is set to the minimum, given the conc
                     pool_vol = pool_real_min_sample_vol
 
-            except zika_utils.VolumeOverflow:
+            except zika.utils.VolumeOverflow:
                 continue
 
             # === STORE FINAL CALCULATION RESULTS ===
@@ -518,14 +518,14 @@ def pool(
             pool.put()
 
         # Get filenames and upload log if errors
-        wl_filename, log_filename = zika_utils.get_filenames(
+        wl_filename, log_filename = zika.utils.get_filenames(
             method_name="pool", pid=currentStep.id
         )
         if errors:
-            raise zika_utils.CheckLog(log, log_filename, lims, currentStep)
+            raise zika.utils.CheckLog(log, log_filename, lims, currentStep)
 
         # Format worklist
-        df_formatted = zika_utils.format_worklist(df_wl.copy(), deck)
+        df_formatted = zika.utils.format_worklist(df_wl.copy(), deck)
 
         # Comments to attach to the worklist header
         comments = [
@@ -539,17 +539,17 @@ def pool(
                 )
 
         # Write the output files
-        zika_utils.write_worklist(
+        zika.utils.write_worklist(
             df=df_formatted.copy(),
             deck=deck,
             wl_filename=wl_filename,
             comments=comments,
         )
-        zika_utils.write_log(log, log_filename)
+        zika.utils.write_log(log, log_filename)
 
         # Upload files
-        zika_utils.upload_csv(currentStep, lims, wl_filename)
-        zika_utils.upload_log(currentStep, lims, log_filename)
+        zika.utils.upload_csv(currentStep, lims, wl_filename)
+        zika.utils.upload_log(currentStep, lims, log_filename)
 
         # Issue warnings, if any
         if any("WARNING" in entry for entry in log):
@@ -656,7 +656,7 @@ def norm(
             if v:
                 to_fetch[k] = f"art_tuple[1]['uri'].udf['{v}']"
 
-        df = zika_utils.fetch_sample_data(currentStep, to_fetch)
+        df = zika.utils.fetch_sample_data(currentStep, to_fetch)
 
         conc_unit = "ng/ul" if use_customer_metrics else df.conc_units[0]
         amt_unit = "ng" if conc_unit == "ng/ul" else "fmol"
@@ -789,34 +789,34 @@ def norm(
         wl_comments = []
 
         # Resolve buffer transfers
-        df_buffer, wl_comments = zika_utils.resolve_buffer_transfers(
+        df_buffer, wl_comments = zika.utils.resolve_buffer_transfers(
             df=df.copy(), wl_comments=wl_comments
         )
 
         # Format worklist
-        df_formatted = zika_utils.format_worklist(df_buffer.copy(), deck=deck)
+        df_formatted = zika.utils.format_worklist(df_buffer.copy(), deck=deck)
         wl_comments.append(
             f"This worklist will enact normalization of {len(df)} samples. For detailed parameters see the worklist log"
         )
 
         # Write files
 
-        wl_filename, log_filename = zika_utils.get_filenames(
+        wl_filename, log_filename = zika.utils.get_filenames(
             method_name="norm", pid=currentStep.id
         )
 
-        zika_utils.write_worklist(
+        zika.utils.write_worklist(
             df=df_formatted.copy(),
             deck=deck,
             wl_filename=wl_filename,
             comments=wl_comments,
         )
 
-        zika_utils.write_log(log, log_filename)
+        zika.utils.write_log(log, log_filename)
 
         # Upload files
-        zika_utils.upload_csv(currentStep, lims, wl_filename)
-        zika_utils.upload_log(currentStep, lims, log_filename)
+        zika.utils.upload_csv(currentStep, lims, wl_filename)
+        zika.utils.upload_log(currentStep, lims, log_filename)
 
         # Issue warnings, if any
         if any("WARNING" in entry for entry in log):
diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py
index 076dc8e4..5835d31d 100644
--- a/scripts/ont_pool.py
+++ b/scripts/ont_pool.py
@@ -9,9 +9,9 @@
 from genologics.lims import Lims
 from numpy import minimum
 from tabulate import tabulate
-from zika_utils import fetch_sample_data
 
 from scilifelab_epps.utils import formula
+from scilifelab_epps.zika.utils import fetch_sample_data
 
 DESC = """
 EPP "ONT pooling", file slot "ONT pooling log".

From 774f3313338f631098dc69d7f50ec736acf5b95a Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:37:54 +0200
Subject: [PATCH 18/50] ruff safe check

---
 scripts/molar_concentration.py   | 2 +-
 scripts/parse_ba_results.py      | 2 +-
 scripts/qc_amount_calculation.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py
index e7603da0..57af5238 100644
--- a/scripts/molar_concentration.py
+++ b/scripts/molar_concentration.py
@@ -16,8 +16,8 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.utils.formula import ng_ul_to_nM
 from scilifelab_epps.epp import EppLogger
+from scilifelab_epps.utils.formula import ng_ul_to_nM
 
 
 def apply_calculations(lims, artifacts, conc_udf, size_udf, unit_udf, epp_logger):
diff --git a/scripts/parse_ba_results.py b/scripts/parse_ba_results.py
index 78f96465..3f437148 100644
--- a/scripts/parse_ba_results.py
+++ b/scripts/parse_ba_results.py
@@ -11,8 +11,8 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.utils import udf_tools
 from scilifelab_epps.epp import get_well_number
+from scilifelab_epps.utils import udf_tools
 
 DESC = """This script parses the Agilent BioAnalyzer XML report.
 
diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py
index ab39568c..2e258259 100644
--- a/scripts/qc_amount_calculation.py
+++ b/scripts/qc_amount_calculation.py
@@ -16,8 +16,8 @@
 from genologics.entities import Process
 from genologics.lims import Lims
 
-from scilifelab_epps.utils import formula, udf_tools
 from scilifelab_epps.epp import EppLogger
+from scilifelab_epps.utils import formula, udf_tools
 
 
 def apply_calculations(artifacts, udf1, op, udf2, unit_amount_map, process):

From 0cd452c37913d5ba252ab5e1afbc9ea44b2cee84 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 14:44:50 +0200
Subject: [PATCH 19/50] bump vlog

---
 VERSIONLOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/VERSIONLOG.md b/VERSIONLOG.md
index 8c5d0a20..9d1d3303 100644
--- a/VERSIONLOG.md
+++ b/VERSIONLOG.md
@@ -1,5 +1,9 @@
 # Scilifelab_epps Version Log
 
+## 20240820.1
+
+Re-organize repo to follow best-practice modularization and implement EPP wrapper.
+
 ## 20240816.1
 
 Set up fixed-volume pooling by Zika for no-QC libraries.

From 8c2991b73a047e363d861a3daf9444e146d488e8 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 16:10:38 +0200
Subject: [PATCH 20/50] aviti dev init

---
 scripts/generate_aviti_run_manifest.py | 53 ++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 scripts/generate_aviti_run_manifest.py

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
new file mode 100644
index 00000000..33da90fb
--- /dev/null
+++ b/scripts/generate_aviti_run_manifest.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+import logging
+import os
+import re
+import shutil
+from argparse import ArgumentParser
+from datetime import datetime as dt
+
+from genologics.config import BASEURI, PASSWORD, USERNAME
+from genologics.entities import Process
+from genologics.lims import Lims
+
+from scilifelab_epps.wrapper import epp_decorator
+
+DESC = """Script to generate Anglerfish samplesheet for ONT runs.
+"""
+
+TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
+
+
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args):
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
+    pass
+
+
+if __name__ == "__main__":
+    # Parse args
+    parser = ArgumentParser(description=DESC)
+    parser.add_argument(
+        "--pid",
+        required=True,
+        type=str,
+        help="Lims ID for current Process.",
+    )
+    parser.add_argument(
+        "--log",
+        required=True,
+        type=str,
+        help="Which file slot to use for the script log.",
+    )
+    parser.add_argument(
+        "--file",
+        required=True,
+        type=str,
+        help="Which file slot to use for the run manifest.",
+    )
+    args = parser.parse_args()
+
+    main()

From 58fe48c5638ce6b5a5a7924f526fd5300e9d1de5 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 16:11:36 +0200
Subject: [PATCH 21/50] bugfix

---
 scripts/generate_aviti_run_manifest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 33da90fb..b59da68a 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -50,4 +50,4 @@ def main(args):
     )
     args = parser.parse_args()
 
-    main()
+    main(args)

From 1cf5a67d9463a03c10093a943ea99b7f20aac685 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 17:42:45 +0200
Subject: [PATCH 22/50] and i OOP

---
 scripts/generate_aviti_run_manifest.py | 72 ++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index b59da68a..8afb4990 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -1,10 +1,8 @@
 #!/usr/bin/env python
 
 import logging
-import os
-import re
-import shutil
 from argparse import ArgumentParser
+from dataclasses import dataclass, field
 from datetime import datetime as dt
 
 from genologics.config import BASEURI, PASSWORD, USERNAME
@@ -19,11 +17,79 @@
 TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
 
 
+class Row:
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+    def write(self, f):
+        for attr in self.__dict__.values():
+            if isinstance(attr, str) and "," in attr:
+                f.write(f'"{attr}", ')
+            else:
+                f.write(f"{attr}, ")
+        f.write("\n")
+
+
+class Section:
+    def __init__(self) -> None:
+        self.rows: list[Row] = []
+
+    def add(self, row: Row):
+        self.rows.append(row)
+
+    def write(self, f):
+        f.write(f"{self.mark_start}\n")
+        for row in self.rows:
+            row.write(f)
+        f.write("\n")
+
+
+class RunValues(Section):
+    def __init__(self) -> None:
+        super().__init__()
+        self.mark_start: str = "[Run Values]"
+        self.cols: list[str] = ["KeyName", "Value"]
+
+
+class Settings(Section):
+    def __init__(self) -> None:
+        super().__init__()
+        self.mark_start: str = "[Settings]"
+        self.cols: list[str] = ["SettingName", "Value"]
+
+
+class Samples(Section):
+    def __init__(self) -> None:
+        super().__init__()
+        self.mark_start: str = "[Samples]"
+        self.cols: list[str] = [
+            "SampleName",
+            "Index1",
+            "Index2",
+            "Lane",
+            "Project",
+            "ExternalID",
+        ]
+
+
+class Manifest:
+    def __init__(self) -> None:
+        self.sections: list[Section] = [RunValues(), Settings(), Samples()]
+
+    def write(self, file_path: str):
+        with open(file_path, "w") as f:
+            for section in self.sections:
+                section.write(f)
+
+
 @epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
 def main(args):
     lims = Lims(BASEURI, USERNAME, PASSWORD)
     process = Process(lims, id=args.pid)
 
+    logging.info("Starting to build run manifest.")
+
     pass
 
 

From cec8ace8632565cacd9e465187337ecf5ef8dda3 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 20 Aug 2024 17:56:15 +0200
Subject: [PATCH 23/50] use dataclass to get __repr__ for FREE

---
 scripts/generate_aviti_run_manifest.py | 36 ++++++++++++++------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 8afb4990..522287ec 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -17,6 +17,7 @@
 TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
 
 
+@dataclass
 class Row:
     def __init__(self, **kwargs):
         for k, v in kwargs.items():
@@ -31,9 +32,9 @@ def write(self, f):
         f.write("\n")
 
 
+@dataclass
 class Section:
-    def __init__(self) -> None:
-        self.rows: list[Row] = []
+    rows: list[Row] = field(default_factory=list)
 
     def add(self, row: Row):
         self.rows.append(row)
@@ -45,25 +46,23 @@ def write(self, f):
         f.write("\n")
 
 
+@dataclass
 class RunValues(Section):
-    def __init__(self) -> None:
-        super().__init__()
-        self.mark_start: str = "[Run Values]"
-        self.cols: list[str] = ["KeyName", "Value"]
+    mark_start: str = "[Run Values]"
+    cols: list[str] = field(default_factory=lambda: ["KeyName", "Value"])
 
 
+@dataclass
 class Settings(Section):
-    def __init__(self) -> None:
-        super().__init__()
-        self.mark_start: str = "[Settings]"
-        self.cols: list[str] = ["SettingName", "Value"]
+    mark_start: str = "[Settings]"
+    cols: list[str] = field(default_factory=lambda: ["SettingName", "Value"])
 
 
+@dataclass
 class Samples(Section):
-    def __init__(self) -> None:
-        super().__init__()
-        self.mark_start: str = "[Samples]"
-        self.cols: list[str] = [
+    mark_start: str = "[Samples]"
+    cols: list[str] = field(
+        default_factory=lambda: [
             "SampleName",
             "Index1",
             "Index2",
@@ -71,15 +70,18 @@ def __init__(self) -> None:
             "Project",
             "ExternalID",
         ]
+    )
 
 
+@dataclass
 class Manifest:
-    def __init__(self) -> None:
-        self.sections: list[Section] = [RunValues(), Settings(), Samples()]
+    runvalues: RunValues = RunValues()
+    settings: Settings = Settings()
+    samples: Samples = Samples()
 
     def write(self, file_path: str):
         with open(file_path, "w") as f:
-            for section in self.sections:
+            for section in [self.runvalues, self.settings, self.samples]:
                 section.write(f)
 
 

From 7602cb89cd2dedc1279cb86bdf2679bb9c0157fc Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 21 Aug 2024 11:51:40 +0200
Subject: [PATCH 24/50] read sample info

---
 scripts/generate_aviti_run_manifest.py | 57 +++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 522287ec..b38585fb 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -5,14 +5,13 @@
 from dataclasses import dataclass, field
 from datetime import datetime as dt
 
+import pandas as pd
 from genologics.config import BASEURI, PASSWORD, USERNAME
 from genologics.entities import Process
 from genologics.lims import Lims
 
 from scilifelab_epps.wrapper import epp_decorator
-
-DESC = """Script to generate Anglerfish samplesheet for ONT runs.
-"""
+from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping
 
 TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
 
@@ -40,7 +39,8 @@ def add(self, row: Row):
         self.rows.append(row)
 
     def write(self, f):
-        f.write(f"{self.mark_start}\n")
+        f.write(self.mark_start + "\n")
+        f.write(", ".join(self.cols) + "\n")
         for row in self.rows:
             row.write(f)
         f.write("\n")
@@ -92,12 +92,57 @@ def main(args):
 
     logging.info("Starting to build run manifest.")
 
-    pass
+    # Get the analytes placed into the flowcell
+    arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
+
+    # Iterate over pools
+    rows = []
+    for art_out in arts_out:
+        assert (
+            art_out.container.type.name == "AVITI Flow Cell"
+        ), "Unsupported container type."
+        assert (
+            len(art_out.samples) > 1 and len(art_out.reagent_labels) > 1
+        ), "Not a pool."
+        assert len(art_out.samples) == len(
+            art_out.reagent_labels
+        ), "Unequal number of samples and reagent labels."
+
+        lane: str = art_out.location[1].split(":")[1]
+        sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out)
+        samples = art_out.samples
+        labels = art_out.reagent_labels
+
+        assert len(labels.unique()) == len(
+            labels
+        ), "Detected non-unique reagent labels."
+
+        # Iterate over samples
+
+        for sample in samples:
+            lims_label = sample2label[sample.name]
+
+            if "-" in lims_label:
+                index1, index2 = lims_label.split("-")
+            else:
+                index1 = lims_label
+                index2 = None
+
+            row = {}
+            row["SampleName"] = sample.name
+            row["Index1"] = index1
+            row["Index2"] = index2
+            row["Lane"] = lane
+
+            rows.append(row)
+
+    df = pd.DataFrame(rows)
+    samples = f"[Samples]\n{df.to_csv(index=None, header=True)}"
 
 
 if __name__ == "__main__":
     # Parse args
-    parser = ArgumentParser(description=DESC)
+    parser = ArgumentParser()
     parser.add_argument(
         "--pid",
         required=True,

From b74cc367bc08a46043d9a14c4676ceb4d48c693e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 21 Aug 2024 15:51:00 +0200
Subject: [PATCH 25/50] wip, samples section seems done

---
 scripts/generate_aviti_run_manifest.py | 96 +++++++++++++++++++++-----
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index b38585fb..0f8056e2 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 import logging
-from argparse import ArgumentParser
+from argparse import ArgumentParser, Namespace
 from dataclasses import dataclass, field
 from datetime import datetime as dt
 
@@ -85,19 +85,21 @@ def write(self, file_path: str):
                 section.write(f)
 
 
-@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
-def main(args):
-    lims = Lims(BASEURI, USERNAME, PASSWORD)
-    process = Process(lims, id=args.pid)
-
-    logging.info("Starting to build run manifest.")
+def get_samples_section(process: Process) -> str:
+    """Generate the [Samples] section of the AVITI run manifest and return it as a string."""
 
     # Get the analytes placed into the flowcell
     arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
 
+    # Assert that both flowcell lanes are filled
+    assert set([art_out.location[1].split(":")[1] for art_out in arts_out]) == set(
+        ["1", "2"]
+    ), "Expected two populated lanes."
+
     # Iterate over pools
-    rows = []
+    all_rows = []
     for art_out in arts_out:
+        lane_rows = []
         assert (
             art_out.container.type.name == "AVITI Flow Cell"
         ), "Unsupported container type."
@@ -107,18 +109,14 @@ def main(args):
         assert len(art_out.samples) == len(
             art_out.reagent_labels
         ), "Unequal number of samples and reagent labels."
-
         lane: str = art_out.location[1].split(":")[1]
         sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out)
         samples = art_out.samples
         labels = art_out.reagent_labels
 
-        assert len(labels.unique()) == len(
-            labels
-        ), "Detected non-unique reagent labels."
+        assert len(set(labels)) == len(labels), "Detected non-unique reagent labels."
 
         # Iterate over samples
-
         for sample in samples:
             lims_label = sample2label[sample.name]
 
@@ -134,10 +132,76 @@ def main(args):
             row["Index2"] = index2
             row["Lane"] = lane
 
-            rows.append(row)
+            lane_rows.append(row)
+
+        # Add PhiX controls
+        for phix_idx_pair in [
+            ("ACGTGTAGC", "GCTAGTGCA"),
+            ("CACATGCTG", "AGACACTGT"),
+            ("GTACACGAT", "CTCGTACAG"),
+            ("TGTGCATCA", "TAGTCGATC"),
+        ]:
+            row = {}
+            row["SampleName"] = "PhiX"
+            row["Index1"] = phix_idx_pair[0]
+            row["Index2"] = phix_idx_pair[1]
+            row["Lane"] = lane
+            lane_rows.append(row)
+
+        # Check for index collision within lane, across samples and PhiX
+        check_index_collision(lane_rows)
+        all_rows.extend(lane_rows)
+
+    df = pd.DataFrame(all_rows)
+
+    samples_section = f"[Samples]\n{df.to_csv(index=None, header=True)}"
+
+    return samples_section
+
+
+def revcomp(seq: str) -> str:
+    """Reverse-complement a DNA string."""
+    return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1]
+
+
+def check_index_collision(rows: list[dict]) -> None:
+    """Directionality-agnostic index collision checker."""
+
+    def idx_combinations(idx1: str, idx2: str | None) -> list[str]:
+        """Given one or two indices, return all possible reverse-complement combinations."""
+        if idx2 is None:
+            return [idx1, revcomp(idx1)]
+        else:
+            return [
+                idx1 + idx2,
+                idx1 + revcomp(idx2),
+                revcomp(idx1) + idx2,
+                revcomp(idx1) + revcomp(idx2),
+            ]
+
+    for i in range(len(rows)):
+        row = rows[i]
+        idxs = idx_combinations(row["Index1"], row["Index2"])
+
+        for row_comp in rows[i + 1 :]:
+            idxs_comp = idx_combinations(row_comp["Index1"], row_comp["Index2"])
+
+            if any(idx in idxs_comp for idx in idxs):
+                raise ValueError(
+                    "Index collision detected between"
+                    + f" {row['SampleName']} ({row['Index1']}-{row['Index2']}) and"
+                    + f" {row_comp['SampleName']} ({row_comp['Index1']}-{row_comp['Index2']})."
+                )
+
+
+@epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
+def main(args: Namespace):
+    lims = Lims(BASEURI, USERNAME, PASSWORD)
+    process = Process(lims, id=args.pid)
+
+    logging.info("Starting to build run manifest.")
 
-    df = pd.DataFrame(rows)
-    samples = f"[Samples]\n{df.to_csv(index=None, header=True)}"
+    samples_section = get_samples_section(process)
 
 
 if __name__ == "__main__":

From 20731398ee85283c21d576af49ed973aaa39c518 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 21 Aug 2024 15:52:24 +0200
Subject: [PATCH 26/50] add todo

---
 scripts/generate_aviti_run_manifest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 0f8056e2..68e4b9be 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -120,6 +120,8 @@ def get_samples_section(process: Process) -> str:
         for sample in samples:
             lims_label = sample2label[sample.name]
 
+            # TODO add code here to parse reagent labels that do not only consist of sequences and dashes
+
             if "-" in lims_label:
                 index1, index2 = lims_label.split("-")
             else:

From dda8fda245c56013c6390861da6a76c765826c50 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 21 Aug 2024 16:27:42 +0200
Subject: [PATCH 27/50] add levenshtein module for edit distance

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index f3d74068..8cd3ba49 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ genologics
 google_api_python_client
 httplib2
 interop
+levenshtein
 Markdown
 numpy
 oauth2client

From f02dadda0b03021e8951d1b730bb72a101c38670 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 21 Aug 2024 16:27:55 +0200
Subject: [PATCH 28/50] checkpoint, probably broken

---
 scripts/generate_aviti_run_manifest.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 68e4b9be..3d9c28b2 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -9,6 +9,7 @@
 from genologics.config import BASEURI, PASSWORD, USERNAME
 from genologics.entities import Process
 from genologics.lims import Lims
+from Levenshtein import distance
 
 from scilifelab_epps.wrapper import epp_decorator
 from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping
@@ -166,7 +167,7 @@ def revcomp(seq: str) -> str:
     return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1]
 
 
-def check_index_collision(rows: list[dict]) -> None:
+def check_index_collision(rows: list[dict], warning_dist: int = 3) -> None:
     """Directionality-agnostic index collision checker."""
 
     def idx_combinations(idx1: str, idx2: str | None) -> list[str]:
@@ -188,6 +189,26 @@ def idx_combinations(idx1: str, idx2: str | None) -> list[str]:
         for row_comp in rows[i + 1 :]:
             idxs_comp = idx_combinations(row_comp["Index1"], row_comp["Index2"])
 
+            for idx in idxs:
+                for idx_comp in idxs_comp:
+                    dist = distance(idx, idx_comp)
+                    if dist <= warning_dist:
+                        warning = "\n".join(
+                            [
+                                f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.",
+                                f" The warning threshold is {warning_dist}.",
+                                "Supplied indexes:",
+                                f" {row['SampleName']}: {row['Index1']}-{row['Index2']}",
+                                f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}",
+                                "Comparison:",
+                                f" {row['SampleName']}: {idx}",
+                                f" {row_comp['SampleName']}: {idx_comp}",
+                            ]
+                        )
+                        logging.warning(warning)
+                        # TODO
+                        print(warning)
+
             if any(idx in idxs_comp for idx in idxs):
                 raise ValueError(
                     "Index collision detected between"

From 94f9cf870b04c722c71c3e22dddd4c9c0a49a71e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 21 Aug 2024 18:18:52 +0200
Subject: [PATCH 29/50] made two functions to very thoroughly check distance,
 maybe too thorough. Up for discussion.

---
 scripts/generate_aviti_run_manifest.py | 134 +++++++++++++++++--------
 1 file changed, 90 insertions(+), 44 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 3d9c28b2..63bf6a88 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -9,7 +9,7 @@
 from genologics.config import BASEURI, PASSWORD, USERNAME
 from genologics.entities import Process
 from genologics.lims import Lims
-from Levenshtein import distance
+from Levenshtein import hamming as distance
 
 from scilifelab_epps.wrapper import epp_decorator
 from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping
@@ -127,7 +127,7 @@ def get_samples_section(process: Process) -> str:
                 index1, index2 = lims_label.split("-")
             else:
                 index1 = lims_label
-                index2 = None
+                index2 = ""
 
             row = {}
             row["SampleName"] = sample.name
@@ -152,7 +152,7 @@ def get_samples_section(process: Process) -> str:
             lane_rows.append(row)
 
         # Check for index collision within lane, across samples and PhiX
-        check_index_collision(lane_rows)
+        check_distances(lane_rows)
         all_rows.extend(lane_rows)
 
     df = pd.DataFrame(all_rows)
@@ -167,54 +167,100 @@ def revcomp(seq: str) -> str:
     return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1]
 
 
-def check_index_collision(rows: list[dict], warning_dist: int = 3) -> None:
-    """Directionality-agnostic index collision checker."""
+def check_pair_distance(row, row_comp, dist_warning_threshold: int = 2):
+    """Directionality-agnostic distance check between two index pairs."""
 
-    def idx_combinations(idx1: str, idx2: str | None) -> list[str]:
-        """Given one or two indices, return all possible reverse-complement combinations."""
-        if idx2 is None:
-            return [idx1, revcomp(idx1)]
-        else:
-            return [
-                idx1 + idx2,
-                idx1 + revcomp(idx2),
-                revcomp(idx1) + idx2,
-                revcomp(idx1) + revcomp(idx2),
+    def get_index_combos(row):
+        return set(
+            [
+                row["Index1"] + row["Index2"],
+                row["Index1"] + revcomp(row["Index2"]),
+                revcomp(row["Index1"]) + row["Index2"],
+                revcomp(row["Index1"]) + revcomp(row["Index2"]),
             ]
+        )
+
+    row_combos = get_index_combos(row)
+    row_comp_combos = get_index_combos(row_comp)
+
+    for row_combo in row_combos:
+        for row_comp_combo in row_comp_combos:
+            dist = distance(row_combo, row_comp_combo)
+
+            if dist <= dist_warning_threshold:
+                warning = "\n".join(
+                    [
+                        f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.",
+                        f" The warning threshold is {dist_warning_threshold}.",
+                        "Supplied indexes:",
+                        f" {row['SampleName']}: {row['Index1']}-{row['Index2']}",
+                        f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}",
+                        "Comparison:",
+                        f" {row['SampleName']}: {row_combo}",
+                        f" {row_comp['SampleName']}: {row_comp_combo}",
+                    ]
+                )
+                logging.warning(warning)
+                if dist == 0:
+                    raise AssertionError("Index collision detected.")
+
+
+def check_pair_distance_new(row, row_comp, dist_warning_threshold: int = 2):
+    """Directionality-agnostic distance check between two index pairs."""
+    dists = []
+    for a1, _a1 in zip(
+        [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"]
+    ):
+        for a2, _a2 in zip(
+            [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"]
+        ):
+            for b1, _b1 in zip(
+                [row_comp["Index1"], revcomp(row_comp["Index1"])],
+                ["Index1", "Index1_rc"],
+            ):
+                for b2, _b2 in zip(
+                    [row_comp["Index2"], revcomp(row_comp["Index2"])],
+                    ["Index2", "Index2_rc"],
+                ):
+                    dists.append(
+                        (
+                            distance(a1, b1) + distance(a2, b2),
+                            f"{a1}-{a2} {b1}-{b2}",
+                            f"{_a1}-{_a2} {_b1}-{_b2}",
+                        )
+                    )
+    min_dist = min(dists, key=lambda x: x[0])
+
+    if min_dist[0] <= dist_warning_threshold:
+        print(f"{row['SampleName']} <--> {row_comp['SampleName']}")
+        print(
+            f"Given: {row['Index1']}-{row['Index2']} <--> {row_comp['Index1']}-{row_comp['Index2']}"
+        )
+        print(f"Distance: {min_dist[0]} when flipped to {min_dist[2]}")
+        print_match(*min_dist[1].split())
+        print()
+
+
+def print_match(seq1, seq2):
+    assert len(seq1) == len(seq2)
+
+    m = ""
+    for seq1_base, seq2_base in zip(seq1, seq2):
+        if seq1_base == seq2_base:
+            m += "|"
+        else:
+            m += "X"
 
+    lines = "\n".join([seq1, m, seq2])
+    print(lines)
+
+
+def check_distances(rows: list[dict]) -> None:
     for i in range(len(rows)):
         row = rows[i]
-        idxs = idx_combinations(row["Index1"], row["Index2"])
 
         for row_comp in rows[i + 1 :]:
-            idxs_comp = idx_combinations(row_comp["Index1"], row_comp["Index2"])
-
-            for idx in idxs:
-                for idx_comp in idxs_comp:
-                    dist = distance(idx, idx_comp)
-                    if dist <= warning_dist:
-                        warning = "\n".join(
-                            [
-                                f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.",
-                                f" The warning threshold is {warning_dist}.",
-                                "Supplied indexes:",
-                                f" {row['SampleName']}: {row['Index1']}-{row['Index2']}",
-                                f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}",
-                                "Comparison:",
-                                f" {row['SampleName']}: {idx}",
-                                f" {row_comp['SampleName']}: {idx_comp}",
-                            ]
-                        )
-                        logging.warning(warning)
-                        # TODO
-                        print(warning)
-
-            if any(idx in idxs_comp for idx in idxs):
-                raise ValueError(
-                    "Index collision detected between"
-                    + f" {row['SampleName']} ({row['Index1']}-{row['Index2']}) and"
-                    + f" {row_comp['SampleName']} ({row_comp['Index1']}-{row_comp['Index2']})."
-                )
+            check_pair_distance_new(row, row_comp)
 
 
 @epp_decorator(script_path=__file__, timestamp=TIMESTAMP)

From 1398db9e550f43e5c4120b03f28ef3c8f71aa15e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 22 Aug 2024 09:41:14 +0200
Subject: [PATCH 30/50] make rc-flips optional in index check

---
 scripts/generate_aviti_run_manifest.py | 125 +++++++++++--------------
 1 file changed, 56 insertions(+), 69 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 63bf6a88..f494aa26 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -167,81 +167,68 @@ def revcomp(seq: str) -> str:
     return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1]
 
 
-def check_pair_distance(row, row_comp, dist_warning_threshold: int = 2):
-    """Directionality-agnostic distance check between two index pairs."""
-
-    def get_index_combos(row):
-        return set(
-            [
-                row["Index1"] + row["Index2"],
-                row["Index1"] + revcomp(row["Index2"]),
-                revcomp(row["Index1"]) + row["Index2"],
-                revcomp(row["Index1"]) + revcomp(row["Index2"]),
-            ]
-        )
-
-    row_combos = get_index_combos(row)
-    row_comp_combos = get_index_combos(row_comp)
-
-    for row_combo in row_combos:
-        for row_comp_combo in row_comp_combos:
-            dist = distance(row_combo, row_comp_combo)
-
-            if dist <= dist_warning_threshold:
-                warning = "\n".join(
-                    [
-                        f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.",
-                        f" The warning threshold is {dist_warning_threshold}.",
-                        "Supplied indexes:",
-                        f" {row['SampleName']}: {row['Index1']}-{row['Index2']}",
-                        f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}",
-                        "Comparison:",
-                        f" {row['SampleName']}: {row_combo}",
-                        f" {row_comp['SampleName']}: {row_comp_combo}",
-                    ]
-                )
-                logging.warning(warning)
-                if dist == 0:
-                    raise AssertionError("Index collision detected.")
-
-
-def check_pair_distance_new(row, row_comp, dist_warning_threshold: int = 2):
-    """Directionality-agnostic distance check between two index pairs."""
-    dists = []
-    for a1, _a1 in zip(
-        [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"]
-    ):
-        for a2, _a2 in zip(
-            [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"]
+def check_pair_distance(
+    row, row_comp, check_flips: bool = False, dist_warning_threshold: int = 3
+):
+    """Distance check between two index pairs.
+
+    row                     dict   manifest row of sample A
+    row_comp                dict   manifest row of sample B
+    check_flips             bool   check all reverse-complement combinations
+    dist_warning_threshold  int    trigger warning for distances at or below this value
+
+    """
+
+    if check_flips:
+        flips = []
+        for a1, _a1 in zip(
+            [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"]
         ):
-            for b1, _b1 in zip(
-                [row_comp["Index1"], revcomp(row_comp["Index1"])],
-                ["Index1", "Index1_rc"],
+            for a2, _a2 in zip(
+                [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"]
             ):
-                for b2, _b2 in zip(
-                    [row_comp["Index2"], revcomp(row_comp["Index2"])],
-                    ["Index2", "Index2_rc"],
+                for b1, _b1 in zip(
+                    [row_comp["Index1"], revcomp(row_comp["Index1"])],
+                    ["Index1", "Index1_rc"],
                 ):
-                    dists.append(
-                        (
-                            distance(a1, b1) + distance(a2, b2),
-                            f"{a1}-{a2} {b1}-{b2}",
-                            f"{_a1}-{_a2} {_b1}-{_b2}",
+                    for b2, _b2 in zip(
+                        [row_comp["Index2"], revcomp(row_comp["Index2"])],
+                        ["Index2", "Index2_rc"],
+                    ):
+                        flips.append(
+                            (
+                                distance(a1, b1) + distance(a2, b2),
+                                f"{a1}-{a2} {b1}-{b2}",
+                                f"{_a1}-{_a2} {_b1}-{_b2}",
+                            )
                         )
-                    )
-    min_dist = min(dists, key=lambda x: x[0])
+        dist, compared_seqs, flip_conf = min(flips, key=lambda x: x[0])
 
-    if min_dist[0] <= dist_warning_threshold:
-        print(f"{row['SampleName']} <--> {row_comp['SampleName']}")
-        print(
-            f"Given: {row['Index1']}-{row['Index2']} <--> {row_comp['Index1']}-{row_comp['Index2']}"
+    else:
+        dist = distance(
+            row["Index1"] + row["Index2"], row_comp["Index1"] + row_comp["Index2"]
+        )
+        compared_seqs = (
+            f"{row['Index1']}-{row['Index2']} {row_comp['Index1']}-{row_comp['Index2']}"
         )
-        print(f"Distance: {min_dist[0]} when flipped to {min_dist[2]}")
-        print_match(*min_dist[1].split())
-        print()
 
+    if dist <= dist_warning_threshold:
+        warning_lines = [
+            f"Hamming distance {dist} between {row['SampleName']} and {row_comp['SampleName']}"
+        ]
+        if check_flips:
+            warning_lines.append(
+                f"Given: {row['Index1']}-{row['Index2']} <-> {row_comp['Index1']}-{row_comp['Index2']}"
+            )
+            warning_lines.append(f"Distance: {dist} when flipped to {flip_conf}")
+        warning_lines.append(visualize_hamming(*compared_seqs.split()))
+        warning = "\n".join(warning_lines)
+        logging.warning(warning)
+
+
+def visualize_hamming(seq1: str, seq2: str) -> str:
+    """Visualize Hamming alignment"""
 
-def print_match(seq1, seq2):
     assert len(seq1) == len(seq2)
 
     m = ""
@@ -252,7 +239,7 @@ def print_match(seq1, seq2):
             m += "X"
 
     lines = "\n".join([seq1, m, seq2])
-    print(lines)
+    return lines
 
 
 def check_distances(rows: list[dict]) -> None:
@@ -260,7 +247,7 @@ def check_distances(rows: list[dict]) -> None:
         row = rows[i]
 
         for row_comp in rows[i + 1 :]:
-            check_pair_distance_new(row, row_comp)
+            check_pair_distance(row, row_comp, dist_warning_threshold=4)
 
 
 @epp_decorator(script_path=__file__, timestamp=TIMESTAMP)

From 628b9314cf0654ab8261c5d56416727fc745509d Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 22 Aug 2024 09:41:43 +0200
Subject: [PATCH 31/50] remove dataclasses

---
 scripts/generate_aviti_run_manifest.py | 70 --------------------------
 1 file changed, 70 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index f494aa26..68919492 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -2,7 +2,6 @@
 
 import logging
 from argparse import ArgumentParser, Namespace
-from dataclasses import dataclass, field
 from datetime import datetime as dt
 
 import pandas as pd
@@ -17,75 +16,6 @@
 TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
 
 
-@dataclass
-class Row:
-    def __init__(self, **kwargs):
-        for k, v in kwargs.items():
-            setattr(self, k, v)
-
-    def write(self, f):
-        for attr in self.__dict__.values():
-            if isinstance(attr, str) and "," in attr:
-                f.write(f'"{attr}", ')
-            else:
-                f.write(f"{attr}, ")
-        f.write("\n")
-
-
-@dataclass
-class Section:
-    rows: list[Row] = field(default_factory=list)
-
-    def add(self, row: Row):
-        self.rows.append(row)
-
-    def write(self, f):
-        f.write(self.mark_start + "\n")
-        f.write(", ".join(self.cols) + "\n")
-        for row in self.rows:
-            row.write(f)
-        f.write("\n")
-
-
-@dataclass
-class RunValues(Section):
-    mark_start: str = "[Run Values]"
-    cols: list[str] = field(default_factory=lambda: ["KeyName", "Value"])
-
-
-@dataclass
-class Settings(Section):
-    mark_start: str = "[Settings]"
-    cols: list[str] = field(default_factory=lambda: ["SettingName", "Value"])
-
-
-@dataclass
-class Samples(Section):
-    mark_start: str = "[Samples]"
-    cols: list[str] = field(
-        default_factory=lambda: [
-            "SampleName",
-            "Index1",
-            "Index2",
-            "Lane",
-            "Project",
-            "ExternalID",
-        ]
-    )
-
-
-@dataclass
-class Manifest:
-    runvalues: RunValues = RunValues()
-    settings: Settings = Settings()
-    samples: Samples = Samples()
-
-    def write(self, file_path: str):
-        with open(file_path, "w") as f:
-            for section in [self.runvalues, self.settings, self.samples]:
-                section.write(f)
-
-
 def get_samples_section(process: Process) -> str:
     """Generate the [Samples] section of the AVITI run manifest and return it as a string."""
 

From 6b8f5ce57083f01977e8b6493f7901998bea991e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 22 Aug 2024 09:43:02 +0200
Subject: [PATCH 32/50] add hard stop for index collision

---
 scripts/generate_aviti_run_manifest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 68919492..8f88cc2c 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -154,6 +154,8 @@ def check_pair_distance(
         warning_lines.append(visualize_hamming(*compared_seqs.split()))
         warning = "\n".join(warning_lines)
         logging.warning(warning)
+        if dist == 0:
+            raise AssertionError("Identical indices detected.")
 
 
 def visualize_hamming(seq1: str, seq2: str) -> str:

From e24ababcaf927219120a925a7df6e0a51f747ca4 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 22 Aug 2024 10:01:10 +0200
Subject: [PATCH 33/50] try fixing reqs

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8cd3ba49..70438d85 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,13 +4,13 @@ genologics
 google_api_python_client
 httplib2
 interop
-levenshtein
 Markdown
 numpy
 oauth2client
 pandas
 protobuf
 psycopg2
+python_levenshtein
 PyYAML
 Requests
 scilifelab_parsers @ git+https://github.com/SciLifeLab/scilifelab_parsers

From ad183bcc4bab59cd417f38818fd1ea0939379116 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 22 Aug 2024 10:14:02 +0200
Subject: [PATCH 34/50] improve docs

---
 scripts/generate_aviti_run_manifest.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 8f88cc2c..69cb5d30 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -143,17 +143,26 @@ def check_pair_distance(
         )
 
     if dist <= dist_warning_threshold:
+        # Build a warning message for the pair
         warning_lines = [
             f"Hamming distance {dist} between {row['SampleName']} and {row_comp['SampleName']}"
         ]
+        # If the distance is derived from a flip, show the original and the flipped conformation
         if check_flips:
             warning_lines.append(
                 f"Given: {row['Index1']}-{row['Index2']} <-> {row_comp['Index1']}-{row_comp['Index2']}"
             )
             warning_lines.append(f"Distance: {dist} when flipped to {flip_conf}")
-        warning_lines.append(visualize_hamming(*compared_seqs.split()))
+        # If the index lengths are equal, add a simple small visual representation
+        if len(row["Index1"]) + len(row["Index2"]) == len(row_comp["Index1"]) + len(
+            row_comp["Index2"]
+        ):
+            warning_lines.append(visualize_hamming(*compared_seqs.split()))
+
         warning = "\n".join(warning_lines)
         logging.warning(warning)
+
+        # For identical collisions, kill the process
         if dist == 0:
             raise AssertionError("Identical indices detected.")
 

From 6798734ab3449b4b5dd67b57a7bd22ff5a613680 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 22 Aug 2024 16:36:27 +0200
Subject: [PATCH 35/50] polishing

---
 scripts/generate_aviti_run_manifest.py | 54 ++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 69cb5d30..6003a123 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -17,7 +17,7 @@
 
 
 def get_samples_section(process: Process) -> str:
-    """Generate the [Samples] section of the AVITI run manifest and return it as a string."""
+    """Generate the [SAMPLES] section of the AVITI run manifest and return it as a string."""
 
     # Get the analytes placed into the flowcell
     arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
@@ -87,7 +87,7 @@ def get_samples_section(process: Process) -> str:
 
     df = pd.DataFrame(all_rows)
 
-    samples_section = f"[Samples]\n{df.to_csv(index=None, header=True)}"
+    samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}"
 
     return samples_section
 
@@ -153,11 +153,11 @@ def check_pair_distance(
                 f"Given: {row['Index1']}-{row['Index2']} <-> {row_comp['Index1']}-{row_comp['Index2']}"
             )
             warning_lines.append(f"Distance: {dist} when flipped to {flip_conf}")
-        # If the index lengths are equal, add a simple small visual representation
+        # If the index lengths are equal, add a simple visual representation
         if len(row["Index1"]) + len(row["Index2"]) == len(row_comp["Index1"]) + len(
             row_comp["Index2"]
         ):
-            warning_lines.append(visualize_hamming(*compared_seqs.split()))
+            warning_lines.append(show_match(*compared_seqs.split()))
 
         warning = "\n".join(warning_lines)
         logging.warning(warning)
@@ -167,8 +167,8 @@ def check_pair_distance(
             raise AssertionError("Identical indices detected.")
 
 
-def visualize_hamming(seq1: str, seq2: str) -> str:
-    """Visualize Hamming alignment"""
+def show_match(seq1: str, seq2: str) -> str:
+    """Visualize base-by-base match between sequences of equal length."""
 
     assert len(seq1) == len(seq2)
 
@@ -183,12 +183,43 @@ def visualize_hamming(seq1: str, seq2: str) -> str:
     return lines
 
 
-def check_distances(rows: list[dict]) -> None:
+def check_distances(rows: list[dict], dist_warning_threshold=3) -> None:
     for i in range(len(rows)):
         row = rows[i]
 
         for row_comp in rows[i + 1 :]:
-            check_pair_distance(row, row_comp, dist_warning_threshold=4)
+            check_pair_distance(
+                row, row_comp, dist_warning_threshold=dist_warning_threshold
+            )
+
+
+def get_runValues_section(process: Process) -> str:
+    """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string."""
+
+    runValues_section = "\n".join(
+        [
+            "[RUNVALUES]",
+            "KeyName, Value",
+            f"lims_step_name, {process.type.name}",
+            f"lims_step_id, {process.id}",
+            f"lims_step_operator, {process.technician.name}",
+            f"file_timestamp, {TIMESTAMP}",
+        ]
+    )
+
+    return runValues_section
+
+
+def get_settings_section(process) -> str:
+    """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string."""
+    settings_section = "\n".join(
+        [
+            "[SETTINGS]",
+            "SettingName, Value",
+        ]
+    )
+
+    return settings_section
 
 
 @epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
@@ -198,8 +229,15 @@ def main(args: Namespace):
 
     logging.info("Starting to build run manifest.")
 
+    runValues_section = get_runValues_section(process)
+    settings_section = get_settings_section(process)
     samples_section = get_samples_section(process)
 
+    # TODO string sanitation
+    manifest = "\n\n".join([runValues_section, settings_section, samples_section])
+
+    # TODO upload manifest to file slot
+
 
 if __name__ == "__main__":
     # Parse args

From 528db93fb1fdb6e9f421d08e5b516792c9d20578 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:03:30 +0200
Subject: [PATCH 36/50] ready for testing

---
 scripts/generate_aviti_run_manifest.py | 85 ++++++++++++++++++++------
 1 file changed, 68 insertions(+), 17 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 6003a123..bb52bd81 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python
 
 import logging
+import os
+import re
+import shutil
 from argparse import ArgumentParser, Namespace
 from datetime import datetime as dt
 
@@ -10,10 +13,12 @@
 from genologics.lims import Lims
 from Levenshtein import hamming as distance
 
+from scilifelab_epps.epp import upload_file
 from scilifelab_epps.wrapper import epp_decorator
 from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping
 
 TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
+LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?")
 
 
 def get_samples_section(process: Process) -> str:
@@ -22,14 +27,16 @@ def get_samples_section(process: Process) -> str:
     # Get the analytes placed into the flowcell
     arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
 
-    # Assert that both flowcell lanes are filled
-    assert set([art_out.location[1].split(":")[1] for art_out in arts_out]) == set(
-        ["1", "2"]
-    ), "Expected two populated lanes."
+    # Check whether lanes are individually addressable
+    lanes_used = set([art_out.location[1].split(":")[1] for art_out in arts_out])
+    ungrouped_lanes = True if len(lanes_used) == 2 else False
+    logging.info(f"Individually addressable lanes: {ungrouped_lanes}")
 
     # Iterate over pools
     all_rows = []
     for art_out in arts_out:
+        logging.info(f"Iterating over pool '{art_out.id}'...")
+
         lane_rows = []
         assert (
             art_out.container.type.name == "AVITI Flow Cell"
@@ -40,6 +47,7 @@ def get_samples_section(process: Process) -> str:
         assert len(art_out.samples) == len(
             art_out.reagent_labels
         ), "Unequal number of samples and reagent labels."
+
         lane: str = art_out.location[1].split(":")[1]
         sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out)
         samples = art_out.samples
@@ -51,19 +59,25 @@ def get_samples_section(process: Process) -> str:
         for sample in samples:
             lims_label = sample2label[sample.name]
 
-            # TODO add code here to parse reagent labels that do not only consist of sequences and dashes
+            # Parse sample index
+            label_seq_match = re.search(LABEL_SEQ_SUBSTRING, lims_label)
+            assert (
+                label_seq_match is not None
+            ), f"Could not parse label sequence from {lims_label}"
+            label_seq = label_seq_match.group(0)
 
-            if "-" in lims_label:
-                index1, index2 = lims_label.split("-")
+            if "-" in label_seq:
+                index1, index2 = label_seq.split("-")
             else:
-                index1 = lims_label
+                index1 = label_seq
                 index2 = ""
 
             row = {}
             row["SampleName"] = sample.name
             row["Index1"] = index1
             row["Index2"] = index2
-            row["Lane"] = lane
+            if ungrouped_lanes:
+                row["Lane"] = lane
 
             lane_rows.append(row)
 
@@ -78,7 +92,8 @@ def get_samples_section(process: Process) -> str:
             row["SampleName"] = "PhiX"
             row["Index1"] = phix_idx_pair[0]
             row["Index2"] = phix_idx_pair[1]
-            row["Lane"] = lane
+            if ungrouped_lanes:
+                row["Lane"] = lane
             lane_rows.append(row)
 
         # Check for index collision within lane, across samples and PhiX
@@ -193,16 +208,25 @@ def check_distances(rows: list[dict], dist_warning_threshold=3) -> None:
             )
 
 
-def get_runValues_section(process: Process) -> str:
+def safe_string(s: str) -> str:
+    """Wrap a string in quotes if it contains commas."""
+    if "," in s:
+        return f'"{s}"'
+    else:
+        return s
+
+
+def get_runValues_section(process: Process, file_name: str) -> str:
     """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string."""
 
     runValues_section = "\n".join(
         [
             "[RUNVALUES]",
             "KeyName, Value",
-            f"lims_step_name, {process.type.name}",
+            f"lims_step_name, {safe_string(process.type.name)}",
             f"lims_step_id, {process.id}",
             f"lims_step_operator, {process.technician.name}",
+            f"file_name, {safe_string(file_name)}",
             f"file_timestamp, {TIMESTAMP}",
         ]
     )
@@ -210,7 +234,7 @@ def get_runValues_section(process: Process) -> str:
     return runValues_section
 
 
-def get_settings_section(process) -> str:
+def get_settings_section() -> str:
     """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string."""
     settings_section = "\n".join(
         [
@@ -227,16 +251,43 @@ def main(args: Namespace):
     lims = Lims(BASEURI, USERNAME, PASSWORD)
     process = Process(lims, id=args.pid)
 
+    file_name = (
+        f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name}.csv"
+    )
+
+    # Build manifest
     logging.info("Starting to build run manifest.")
 
-    runValues_section = get_runValues_section(process)
-    settings_section = get_settings_section(process)
+    runValues_section = get_runValues_section(process, file_name)
+    settings_section = get_settings_section()
     samples_section = get_samples_section(process)
 
-    # TODO string sanitation
     manifest = "\n\n".join([runValues_section, settings_section, samples_section])
 
-    # TODO upload manifest to file slot
+    # Write manifest
+    with open(file_name, "w") as f:
+        f.write(manifest, encoding="utf-8")
+
+    # Upload manifest
+    logging.info("Uploading run manifest to LIMS...")
+    upload_file(
+        file_name,
+        args.file,
+        process,
+        lims,
+    )
+
+    logging.info("Moving samplesheet to ngi-nas-ns...")
+    try:
+        shutil.copyfile(
+            file_name,
+            f"/srv/ngi-nas-ns/samplesheets/AVITI/{dt.now().year}/{file_name}",
+        )
+        os.remove(file_name)
+    except:
+        logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True)
+    else:
+        logging.info("Samplesheet moved to ngi-nas-ns.")
 
 
 if __name__ == "__main__":

From 20036f898c999bce22b456282d2ba36738167dda Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:05:06 +0200
Subject: [PATCH 37/50] fix name

---
 scripts/generate_aviti_run_manifest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index bb52bd81..99c54278 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -277,7 +277,7 @@ def main(args: Namespace):
         lims,
     )
 
-    logging.info("Moving samplesheet to ngi-nas-ns...")
+    logging.info("Moving run manifest to ngi-nas-ns...")
     try:
         shutil.copyfile(
             file_name,
@@ -285,9 +285,9 @@ def main(args: Namespace):
         )
         os.remove(file_name)
     except:
-        logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True)
+        logging.error("Failed to move run manifest to ngi-nas-ns.", exc_info=True)
     else:
-        logging.info("Samplesheet moved to ngi-nas-ns.")
+        logging.info("Run manifest moved to ngi-nas-ns.")
 
 
 if __name__ == "__main__":

From eafe4db0408dfe505e757f79c45b20ec61867e7e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:08:17 +0200
Subject: [PATCH 38/50] move blocks

---
 scripts/generate_aviti_run_manifest.py | 89 +++++++++++++-------------
 1 file changed, 46 insertions(+), 43 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 99c54278..541f2b69 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -21,6 +21,38 @@
 LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?")
 
 
+def get_runValues_section(process: Process, file_name: str) -> str:
+    """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string."""
+
+    # TODO master step fields for read recipe?
+
+    runValues_section = "\n".join(
+        [
+            "[RUNVALUES]",
+            "KeyName, Value",
+            f"lims_step_name, {safe_string(process.type.name)}",
+            f"lims_step_id, {process.id}",
+            f"lims_step_operator, {process.technician.name}",
+            f"file_name, {safe_string(file_name)}",
+            f"file_timestamp, {TIMESTAMP}",
+        ]
+    )
+
+    return runValues_section
+
+
+def get_settings_section() -> str:
+    """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string."""
+    settings_section = "\n".join(
+        [
+            "[SETTINGS]",
+            "SettingName, Value",
+        ]
+    )
+
+    return settings_section
+
+
 def get_samples_section(process: Process) -> str:
     """Generate the [SAMPLES] section of the AVITI run manifest and return it as a string."""
 
@@ -82,6 +114,7 @@ def get_samples_section(process: Process) -> str:
             lane_rows.append(row)
 
         # Add PhiX controls
+        # TODO read from master step field
         for phix_idx_pair in [
             ("ACGTGTAGC", "GCTAGTGCA"),
             ("CACATGCTG", "AGACACTGT"),
@@ -107,9 +140,14 @@ def get_samples_section(process: Process) -> str:
     return samples_section
 
 
-def revcomp(seq: str) -> str:
-    """Reverse-complement a DNA string."""
-    return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1]
+def check_distances(rows: list[dict], dist_warning_threshold=3) -> None:
+    for i in range(len(rows)):
+        row = rows[i]
+
+        for row_comp in rows[i + 1 :]:
+            check_pair_distance(
+                row, row_comp, dist_warning_threshold=dist_warning_threshold
+            )
 
 
 def check_pair_distance(
@@ -182,6 +220,11 @@ def check_pair_distance(
             raise AssertionError("Identical indices detected.")
 
 
+def revcomp(seq: str) -> str:
+    """Reverse-complement a DNA string."""
+    return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1]
+
+
 def show_match(seq1: str, seq2: str) -> str:
     """Visualize base-by-base match between sequences of equal length."""
 
@@ -198,16 +241,6 @@ def show_match(seq1: str, seq2: str) -> str:
     return lines
 
 
-def check_distances(rows: list[dict], dist_warning_threshold=3) -> None:
-    for i in range(len(rows)):
-        row = rows[i]
-
-        for row_comp in rows[i + 1 :]:
-            check_pair_distance(
-                row, row_comp, dist_warning_threshold=dist_warning_threshold
-            )
-
-
 def safe_string(s: str) -> str:
     """Wrap a string in quotes if it contains commas."""
     if "," in s:
@@ -216,36 +249,6 @@ def safe_string(s: str) -> str:
         return s
 
 
-def get_runValues_section(process: Process, file_name: str) -> str:
-    """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string."""
-
-    runValues_section = "\n".join(
-        [
-            "[RUNVALUES]",
-            "KeyName, Value",
-            f"lims_step_name, {safe_string(process.type.name)}",
-            f"lims_step_id, {process.id}",
-            f"lims_step_operator, {process.technician.name}",
-            f"file_name, {safe_string(file_name)}",
-            f"file_timestamp, {TIMESTAMP}",
-        ]
-    )
-
-    return runValues_section
-
-
-def get_settings_section() -> str:
-    """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string."""
-    settings_section = "\n".join(
-        [
-            "[SETTINGS]",
-            "SettingName, Value",
-        ]
-    )
-
-    return settings_section
-
-
 @epp_decorator(script_path=__file__, timestamp=TIMESTAMP)
 def main(args: Namespace):
     lims = Lims(BASEURI, USERNAME, PASSWORD)

From 6742781929f7711c50a4ae844849ad5a17f37c15 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:11:22 +0200
Subject: [PATCH 39/50] remove kw arg

---
 scripts/generate_aviti_run_manifest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 541f2b69..3d0cd088 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -269,7 +269,7 @@ def main(args: Namespace):
 
     # Write manifest
     with open(file_name, "w") as f:
-        f.write(manifest, encoding="utf-8")
+        f.write(manifest)
 
     # Upload manifest
     logging.info("Uploading run manifest to LIMS...")

From c6d47f3f8af58fe79ee0d00f2014e024173c7e75 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:16:29 +0200
Subject: [PATCH 40/50] get rid of space

---
 scripts/generate_aviti_run_manifest.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 3d0cd088..1d8adc23 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -254,9 +254,7 @@ def main(args: Namespace):
     lims = Lims(BASEURI, USERNAME, PASSWORD)
     process = Process(lims, id=args.pid)
 
-    file_name = (
-        f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name}.csv"
-    )
+    file_name = f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv"
 
     # Build manifest
     logging.info("Starting to build run manifest.")

From f044e7fa40afaa03ec3252c3e3555a99e1aba369 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:18:02 +0200
Subject: [PATCH 41/50] remove superfluous log

---
 scripts/generate_aviti_run_manifest.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 1d8adc23..2ca70f7f 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -67,8 +67,6 @@ def get_samples_section(process: Process) -> str:
     # Iterate over pools
     all_rows = []
     for art_out in arts_out:
-        logging.info(f"Iterating over pool '{art_out.id}'...")
-
         lane_rows = []
         assert (
             art_out.container.type.name == "AVITI Flow Cell"

From ee1ad8c2dffc266755cb2505eb70cd94ea58eb56 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 10:21:36 +0200
Subject: [PATCH 42/50] make assertion more lenient

---
 scripts/generate_aviti_run_manifest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 2ca70f7f..ca287787 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -69,8 +69,8 @@ def get_samples_section(process: Process) -> str:
     for art_out in arts_out:
         lane_rows = []
         assert (
-            art_out.container.type.name == "AVITI Flow Cell"
-        ), "Unsupported container type."
+            "AVITI Flow Cell" in art_out.container.type.name
+        ), f"Unsupported container type {art_out.container.type.name}."
         assert (
             len(art_out.samples) > 1 and len(art_out.reagent_labels) > 1
         ), "Not a pool."

From 3a53112faed1711cac354a40656e40d2b71c59f2 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 11:00:55 +0200
Subject: [PATCH 43/50] make adding phix to manifest conditional on udf, dump
 run recipe in samplesheet

---
 scripts/generate_aviti_run_manifest.py | 40 ++++++++++++++++----------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index ca287787..1b18e51c 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -24,7 +24,14 @@
 def get_runValues_section(process: Process, file_name: str) -> str:
     """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string."""
 
-    # TODO master step fields for read recipe?
+    read_recipe = "-".join(
+        [
+            str(process.udf.get("Read 1 Cycles", 0)),
+            str(process.udf.get("Index Read 1", 0)),
+            str(process.udf.get("Index Read 2", 0)),
+            str(process.udf.get("Read 2 Cycles", 0)),
+        ]
+    )
 
     runValues_section = "\n".join(
         [
@@ -35,6 +42,7 @@ def get_runValues_section(process: Process, file_name: str) -> str:
             f"lims_step_operator, {process.technician.name}",
             f"file_name, {safe_string(file_name)}",
             f"file_timestamp, {TIMESTAMP}",
+            f"read_recipe, {read_recipe}",
         ]
     )
 
@@ -56,6 +64,8 @@ def get_settings_section() -> str:
 def get_samples_section(process: Process) -> str:
     """Generate the [SAMPLES] section of the AVITI run manifest and return it as a string."""
 
+    phix_loaded: bool = process.udf["PhiX Loaded"]
+
     # Get the analytes placed into the flowcell
     arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
 
@@ -112,20 +122,20 @@ def get_samples_section(process: Process) -> str:
             lane_rows.append(row)
 
         # Add PhiX controls
-        # TODO read from master step field
-        for phix_idx_pair in [
-            ("ACGTGTAGC", "GCTAGTGCA"),
-            ("CACATGCTG", "AGACACTGT"),
-            ("GTACACGAT", "CTCGTACAG"),
-            ("TGTGCATCA", "TAGTCGATC"),
-        ]:
-            row = {}
-            row["SampleName"] = "PhiX"
-            row["Index1"] = phix_idx_pair[0]
-            row["Index2"] = phix_idx_pair[1]
-            if ungrouped_lanes:
-                row["Lane"] = lane
-            lane_rows.append(row)
+        if phix_loaded:
+            for phix_idx_pair in [
+                ("ACGTGTAGC", "GCTAGTGCA"),
+                ("CACATGCTG", "AGACACTGT"),
+                ("GTACACGAT", "CTCGTACAG"),
+                ("TGTGCATCA", "TAGTCGATC"),
+            ]:
+                row = {}
+                row["SampleName"] = "PhiX"
+                row["Index1"] = phix_idx_pair[0]
+                row["Index2"] = phix_idx_pair[1]
+                if ungrouped_lanes:
+                    row["Lane"] = lane
+                lane_rows.append(row)
 
         # Check for index collision within lane, across samples and PhiX
         check_distances(lane_rows)

From 8f82db1175451759fb3352d36bb8eec1ae7b05d1 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Fri, 23 Aug 2024 11:04:40 +0200
Subject: [PATCH 44/50] correct path

---
 scripts/generate_aviti_run_manifest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 1b18e51c..b7c7e455 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -290,7 +290,7 @@ def main(args: Namespace):
     try:
         shutil.copyfile(
             file_name,
-            f"/srv/ngi-nas-ns/samplesheets/AVITI/{dt.now().year}/{file_name}",
+            f"/srv/ngi-nas-ns/samplesheets/Aviti/{dt.now().year}/{file_name}",
         )
         os.remove(file_name)
     except:

From 52664a9adbf38caea62a9221d5f47176d0807436 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 27 Aug 2024 11:04:17 +0200
Subject: [PATCH 45/50] always explicate lanes in [SAMPLES] section

---
 scripts/generate_aviti_run_manifest.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index b7c7e455..3d6dd2e2 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -68,15 +68,16 @@ def get_samples_section(process: Process) -> str:
 
     # Get the analytes placed into the flowcell
     arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
+    lanes = [art_out.location[1].split(":")[1] for art_out in arts_out]
 
-    # Check whether lanes are individually addressable
-    lanes_used = set([art_out.location[1].split(":")[1] for art_out in arts_out])
-    ungrouped_lanes = True if len(lanes_used) == 2 else False
-    logging.info(f"Individually addressable lanes: {ungrouped_lanes}")
+    # If only a single pool is added to the LIMS container, treat it as though it was loaded into both lanes
+    if len(lanes) == 1:
+        lanes.append("2" if lanes[0] == "1" else "1")
+        arts_out.append(arts_out[0])
 
     # Iterate over pools
     all_rows = []
-    for art_out in arts_out:
+    for art_out, lane in zip(arts_out, lanes):
         lane_rows = []
         assert (
             "AVITI Flow Cell" in art_out.container.type.name
@@ -88,7 +89,6 @@ def get_samples_section(process: Process) -> str:
             art_out.reagent_labels
         ), "Unequal number of samples and reagent labels."
 
-        lane: str = art_out.location[1].split(":")[1]
         sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out)
         samples = art_out.samples
         labels = art_out.reagent_labels
@@ -116,8 +116,7 @@ def get_samples_section(process: Process) -> str:
             row["SampleName"] = sample.name
             row["Index1"] = index1
             row["Index2"] = index2
-            if ungrouped_lanes:
-                row["Lane"] = lane
+            row["Lane"] = lane
 
             lane_rows.append(row)
 
@@ -133,8 +132,7 @@ def get_samples_section(process: Process) -> str:
                 row["SampleName"] = "PhiX"
                 row["Index1"] = phix_idx_pair[0]
                 row["Index2"] = phix_idx_pair[1]
-                if ungrouped_lanes:
-                    row["Lane"] = lane
+                row["Lane"] = lane
                 lane_rows.append(row)
 
         # Check for index collision within lane, across samples and PhiX

From 5b0ca7b022e265f5aadf23f0c1321986658fd707 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 27 Aug 2024 11:09:23 +0200
Subject: [PATCH 46/50] sort samples section by name and lane

---
 scripts/generate_aviti_run_manifest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 3d6dd2e2..16991ae5 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -140,6 +140,7 @@ def get_samples_section(process: Process) -> str:
         all_rows.extend(lane_rows)
 
     df = pd.DataFrame(all_rows)
+    df.sort_values(by=["SampleName", "Lane"], inplace=True)
 
     samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}"
 

From 5ac1299c8d167ad4498be14fbc617477f9b48bc0 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 27 Aug 2024 11:12:36 +0200
Subject: [PATCH 47/50] Revert "sort samples section by name and lane"

This reverts commit 5b0ca7b022e265f5aadf23f0c1321986658fd707.
---
 scripts/generate_aviti_run_manifest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 16991ae5..3d6dd2e2 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -140,7 +140,6 @@ def get_samples_section(process: Process) -> str:
         all_rows.extend(lane_rows)
 
     df = pd.DataFrame(all_rows)
-    df.sort_values(by=["SampleName", "Lane"], inplace=True)
 
     samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}"
 

From 0ecb673aded3c708693a604121cbef902d9498bc Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 27 Aug 2024 13:35:43 +0200
Subject: [PATCH 48/50] adapt to 2-lane container

---
 scripts/generate_aviti_run_manifest.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 3d6dd2e2..827f6f5d 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -66,14 +66,11 @@ def get_samples_section(process: Process) -> str:
 
     phix_loaded: bool = process.udf["PhiX Loaded"]
 
-    # Get the analytes placed into the flowcell
+    # Assert two output analytes placed in either flowcell lane
     arts_out = [op for op in process.all_outputs() if op.type == "Analyte"]
+    assert len(arts_out) == 2, "Expected two output analytes."
     lanes = [art_out.location[1].split(":")[1] for art_out in arts_out]
-
-    # If only a single pool is added to the LIMS container, treat it as though it was loaded into both lanes
-    if len(lanes) == 1:
-        lanes.append("2" if lanes[0] == "1" else "1")
-        arts_out.append(arts_out[0])
+    assert set(lanes) == {"1", "2"}, "Expected lanes 1 and 2."
 
     # Iterate over pools
     all_rows = []

From 0cbddfc60c54a6f85b6e5b2e6483a12c1394f275 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 27 Aug 2024 13:45:37 +0200
Subject: [PATCH 49/50] Include flowcell ID, rename sanitation func and trim
 redundant metadata

---
 scripts/generate_aviti_run_manifest.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index 827f6f5d..eee58152 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -21,6 +21,16 @@
 LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?")
 
 
+def get_flowcell_id(process: Process) -> str:
+    flowcell_ids = [
+        op.container.name for op in process.all_outputs() if op.type == "Analyte"
+    ]
+
+    assert len(set(flowcell_ids)) == 1, "Expected one flowcell ID."
+
+    return flowcell_ids[0]
+
+
 def get_runValues_section(process: Process, file_name: str) -> str:
     """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string."""
 
@@ -37,11 +47,8 @@ def get_runValues_section(process: Process, file_name: str) -> str:
         [
             "[RUNVALUES]",
             "KeyName, Value",
-            f"lims_step_name, {safe_string(process.type.name)}",
-            f"lims_step_id, {process.id}",
-            f"lims_step_operator, {process.technician.name}",
-            f"file_name, {safe_string(file_name)}",
-            f"file_timestamp, {TIMESTAMP}",
+            f"lims_step_name, {sanitize(process.type.name)}",
+            f"file_name, {sanitize(file_name)}",
             f"read_recipe, {read_recipe}",
         ]
     )
@@ -244,7 +251,7 @@ def show_match(seq1: str, seq2: str) -> str:
     return lines
 
 
-def safe_string(s: str) -> str:
+def sanitize(s: str) -> str:
     """Wrap a string in quotes if it contains commas."""
     if "," in s:
         return f'"{s}"'
@@ -257,7 +264,9 @@ def main(args: Namespace):
     lims = Lims(BASEURI, USERNAME, PASSWORD)
     process = Process(lims, id=args.pid)
 
-    file_name = f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv"
+    # Name manifest file
+    flowcell_id = get_flowcell_id(process)
+    file_name = f"AVITI_run_manifest_{flowcell_id}_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv"
 
     # Build manifest
     logging.info("Starting to build run manifest.")

From 20f6dbd1e02a5b823f0849e851d16c4904699f2a Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Tue, 27 Aug 2024 13:53:33 +0200
Subject: [PATCH 50/50] add warning

---
 scripts/generate_aviti_run_manifest.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py
index eee58152..2cae17f3 100644
--- a/scripts/generate_aviti_run_manifest.py
+++ b/scripts/generate_aviti_run_manifest.py
@@ -27,8 +27,14 @@ def get_flowcell_id(process: Process) -> str:
     ]
 
     assert len(set(flowcell_ids)) == 1, "Expected one flowcell ID."
+    flowcell_id = flowcell_ids[0]
 
-    return flowcell_ids[0]
+    if "-" in flowcell_id:
+        logging.warning(
+            f"Container name {flowcell_id} contains a dash, did you forget to set the name of the LIMS container to the flowcell ID?"
+        )
+
+    return flowcell_id
 
 
 def get_runValues_section(process: Process, file_name: str) -> str: