From a9edf30376e6cf1e471ead29899b23afc58c68a1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 19 Aug 2024 10:46:56 +0200 Subject: [PATCH 01/50] turn the scripts dir into a module, to enable importing code from the repo rather than the installed package --- scripts/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 scripts/__init__.py diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 00000000..e69de29b From 6a998b10a8bbc52061a32d3004e840592e5e55d4 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 19 Aug 2024 10:47:06 +0200 Subject: [PATCH 02/50] use module import --- scripts/zika_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/zika_methods.py b/scripts/zika_methods.py index a813bd47..8a90548e 100644 --- a/scripts/zika_methods.py +++ b/scripts/zika_methods.py @@ -10,9 +10,9 @@ import numpy as np import pandas as pd -import zika_utils from epp_utils.udf_tools import is_filled +from scripts import zika_utils def pool_fixed_vol( From a55379b86b1f51ad5d0ec02bc1f92075a51d2d5e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 19 Aug 2024 10:52:18 +0200 Subject: [PATCH 03/50] modularize and nest "calc_from_args_utils" and "epp_utils" --- .../calc_from_args_utils}/__init__.py | 0 .../calc_from_args_utils}/calculation_methods.py | 4 ++-- .../calc_from_args_utils}/udf_arg_methods.py | 2 +- {epp_utils => scilifelab_epps/epp_utils}/__init__.py | 0 {epp_utils => scilifelab_epps/epp_utils}/formula.py | 0 {epp_utils => scilifelab_epps/epp_utils}/udf_tools.py | 0 scripts/calc_from_args.py | 2 +- scripts/generate_minknow_samplesheet.py | 2 +- scripts/log_udfs.py | 2 +- scripts/molar_concentration.py | 2 +- scripts/ont_calc_volumes.py | 2 +- scripts/ont_pool.py | 2 +- scripts/ont_sync_to_db.py | 2 +- scripts/ont_update_amount.py | 2 +- scripts/parse_anglerfish_results.py | 2 +- scripts/parse_ba_results.py | 2 +- scripts/qc_amount_calculation.py | 2 +- scripts/zika_methods.py | 2 +- scripts/zika_utils.py | 2 +- 19 files changed, 16 insertions(+), 16 deletions(-) rename {calc_from_args_utils => scilifelab_epps/calc_from_args_utils}/__init__.py (100%) rename {calc_from_args_utils => scilifelab_epps/calc_from_args_utils}/calculation_methods.py (99%) rename {calc_from_args_utils => scilifelab_epps/calc_from_args_utils}/udf_arg_methods.py (98%) rename {epp_utils => scilifelab_epps/epp_utils}/__init__.py (100%) rename {epp_utils => scilifelab_epps/epp_utils}/formula.py (100%) rename {epp_utils => scilifelab_epps/epp_utils}/udf_tools.py (100%) diff --git a/calc_from_args_utils/__init__.py b/scilifelab_epps/calc_from_args_utils/__init__.py similarity index 100% rename from calc_from_args_utils/__init__.py rename to scilifelab_epps/calc_from_args_utils/__init__.py diff --git a/calc_from_args_utils/calculation_methods.py b/scilifelab_epps/calc_from_args_utils/calculation_methods.py similarity index 99% rename from calc_from_args_utils/calculation_methods.py rename to scilifelab_epps/calc_from_args_utils/calculation_methods.py index 5cb33f28..7e7abfc3 100644 --- a/calc_from_args_utils/calculation_methods.py +++ b/scilifelab_epps/calc_from_args_utils/calculation_methods.py @@ -7,12 +7,12 @@ import tabulate from genologics.entities import Process -from calc_from_args_utils.udf_arg_methods import ( +from scilifelab_epps.calc_from_args_utils.udf_arg_methods import ( fetch_from_arg, get_UDF_source, get_UDF_source_name, ) -from epp_utils import formula, udf_tools +from scilifelab_epps.epp_utils import formula, udf_tools DESC = """This file contains the method functions for a UDF-agnostic script.""" diff --git a/calc_from_args_utils/udf_arg_methods.py b/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py similarity index 98% rename from calc_from_args_utils/udf_arg_methods.py rename to scilifelab_epps/calc_from_args_utils/udf_arg_methods.py index 3f5a56f4..a321025f 100644 --- a/calc_from_args_utils/udf_arg_methods.py +++ b/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py @@ -5,7 +5,7 @@ import yaml from genologics.entities import Artifact, Process -from epp_utils import udf_tools +from scilifelab_epps.epp_utils import udf_tools def fetch_from_arg( diff --git a/epp_utils/__init__.py b/scilifelab_epps/epp_utils/__init__.py similarity index 100% rename from epp_utils/__init__.py rename to scilifelab_epps/epp_utils/__init__.py diff --git a/epp_utils/formula.py b/scilifelab_epps/epp_utils/formula.py similarity index 100% rename from epp_utils/formula.py rename to scilifelab_epps/epp_utils/formula.py diff --git a/epp_utils/udf_tools.py b/scilifelab_epps/epp_utils/udf_tools.py similarity index 100% rename from epp_utils/udf_tools.py rename to scilifelab_epps/epp_utils/udf_tools.py diff --git a/scripts/calc_from_args.py b/scripts/calc_from_args.py index b8c398d4..1f6e48b1 100644 --- a/scripts/calc_from_args.py +++ b/scripts/calc_from_args.py @@ -9,7 +9,7 @@ from genologics.entities import Process from genologics.lims import Lims -from calc_from_args_utils import calculation_methods +from scilifelab_epps.calc_from_args_utils import calculation_methods from scilifelab_epps.epp import upload_file DESC = """UDF-agnostic script to perform calculations across all artifacts of a step. diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index 25122426..88ce0410 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -17,7 +17,7 @@ from tabulate import tabulate from data.ONT_barcodes import ONT_BARCODE_LABEL_PATTERN, ONT_BARCODES -from epp_utils.udf_tools import fetch +from scilifelab_epps.epp_utils.udf_tools import fetch from scilifelab_epps.epp import traceback_to_step, upload_file DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs. diff --git a/scripts/log_udfs.py b/scripts/log_udfs.py index 5d86de13..24fb52f6 100644 --- a/scripts/log_udfs.py +++ b/scripts/log_udfs.py @@ -12,7 +12,7 @@ from ont_send_reloading_info_to_db import parse_run from tabulate import tabulate -from epp_utils import udf_tools +from scilifelab_epps.epp_utils import udf_tools DESC = """Script for the EPP "Log fields" and file slot "Field log". diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py index 40289d4a..1b0eee71 100644 --- a/scripts/molar_concentration.py +++ b/scripts/molar_concentration.py @@ -16,7 +16,7 @@ from genologics.entities import Process from genologics.lims import Lims -from epp_utils.formula import ng_ul_to_nM +from scilifelab_epps.epp_utils.formula import ng_ul_to_nM from scilifelab_epps.epp import EppLogger diff --git a/scripts/ont_calc_volumes.py b/scripts/ont_calc_volumes.py index 5aafca99..b9528a9f 100644 --- a/scripts/ont_calc_volumes.py +++ b/scripts/ont_calc_volumes.py @@ -8,7 +8,7 @@ from genologics.entities import Process from genologics.lims import Lims -from epp_utils import formula, udf_tools +from scilifelab_epps.epp_utils import formula, udf_tools DESC = """ EPP "ONT calculate volumes" diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py index fd067f4d..9a8b7304 100644 --- a/scripts/ont_pool.py +++ b/scripts/ont_pool.py @@ -11,7 +11,7 @@ from tabulate import tabulate from zika_utils import fetch_sample_data -from epp_utils import formula +from scilifelab_epps.epp_utils import formula DESC = """ EPP "ONT pooling", file slot "ONT pooling log". diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py index 2dc13bd2..95c0e64c 100644 --- a/scripts/ont_sync_to_db.py +++ b/scripts/ont_sync_to_db.py @@ -18,7 +18,7 @@ from genologics.lims import Lims from ont_send_reloading_info_to_db import get_ONT_db -from epp_utils import udf_tools +from scilifelab_epps.epp_utils import udf_tools from scilifelab_epps.epp import upload_file DESC = """Script for finishing the step to start ONT sequencing in LIMS. diff --git a/scripts/ont_update_amount.py b/scripts/ont_update_amount.py index 174e4f3c..b0b215ca 100644 --- a/scripts/ont_update_amount.py +++ b/scripts/ont_update_amount.py @@ -8,7 +8,7 @@ from genologics.entities import Process from genologics.lims import Lims -from epp_utils import formula, udf_tools +from scilifelab_epps.epp_utils import formula, udf_tools DESC = """ EPP "ONT Update Amounts". diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index 07a7d936..c9856801 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -11,7 +11,7 @@ from genologics.entities import Artifact, Process from genologics.lims import Lims -from epp_utils import udf_tools +from scilifelab_epps.epp_utils import udf_tools from scilifelab_epps.epp import upload_file TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S") diff --git a/scripts/parse_ba_results.py b/scripts/parse_ba_results.py index 9e7c7905..f19c9eac 100644 --- a/scripts/parse_ba_results.py +++ b/scripts/parse_ba_results.py @@ -11,7 +11,7 @@ from genologics.entities import Process from genologics.lims import Lims -from epp_utils import udf_tools +from scilifelab_epps.epp_utils import udf_tools from scilifelab_epps.epp import get_well_number DESC = """This script parses the Agilent BioAnalyzer XML report. diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py index 503318a9..ab119967 100644 --- a/scripts/qc_amount_calculation.py +++ b/scripts/qc_amount_calculation.py @@ -16,7 +16,7 @@ from genologics.entities import Process from genologics.lims import Lims -from epp_utils import formula, udf_tools +from scilifelab_epps.epp_utils import formula, udf_tools from scilifelab_epps.epp import EppLogger diff --git a/scripts/zika_methods.py b/scripts/zika_methods.py index 8a90548e..27d294a5 100644 --- a/scripts/zika_methods.py +++ b/scripts/zika_methods.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from epp_utils.udf_tools import is_filled +from scilifelab_epps.epp_utils.udf_tools import is_filled from scripts import zika_utils diff --git a/scripts/zika_utils.py b/scripts/zika_utils.py index 941e8da7..4fdb57d0 100644 --- a/scripts/zika_utils.py +++ b/scripts/zika_utils.py @@ -17,7 +17,7 @@ import pandas as pd from genologics.entities import Process -from epp_utils.udf_tools import fetch_last +from scilifelab_epps.epp_utils.udf_tools import fetch_last def verify_step(currentStep, targets=None): From 1fd77134ec1551f5ec7d7c24098eec716822df40 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 19 Aug 2024 10:55:17 +0200 Subject: [PATCH 04/50] add autobuild folder to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 113985a6..23506842 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ build/ *.swp __pycache__ node_modules +dist From 0b982c1656bd169ca88a4552ad8ef84e3fefa86d Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 19 Aug 2024 15:47:07 +0200 Subject: [PATCH 05/50] try constructing script wrapper --- scilifelab_epps/wrapper.py | 101 ++++++++++++++++++ scripts/generate_minknow_samplesheet.py | 133 +++++++----------------- 2 files changed, 138 insertions(+), 96 deletions(-) create mode 100644 scilifelab_epps/wrapper.py diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py new file mode 100644 index 00000000..2542d510 --- /dev/null +++ b/scilifelab_epps/wrapper.py @@ -0,0 +1,101 @@ +import logging +import os +import sys +from datetime import datetime as dt + +from genologics.config import BASEURI, PASSWORD, USERNAME +from genologics.entities import Process +from genologics.lims import Lims + +from scilifelab_epps.epp import upload_file + + +def epp_decorator(file: str): + """Decorator for passing file info.""" + SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] + + def _epp_decorator(script_main): + """Decorator for wrapping EPP scripts.""" + + def epp_wrapper(*args, **kwargs): + """General wrapper for EPP scripts.""" + + TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") + + # Set up LIMS + lims = Lims(BASEURI, USERNAME, PASSWORD) + lims.check_version() + process = Process(lims, id=args.pid) + + # Name log file + log_filename: str = ( + "_".join( + [ + SCRIPT_NAME, + process.id, + TIMESTAMP, + process.technician.name.replace(" ", ""), + ] + ) + + ".log" + ) + + # Set up logging + logging.basicConfig( + filename=log_filename, + filemode="w", + format="%(levelname)s: %(message)s", + level=logging.INFO, + ) + + # Start logging + logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") + logging.info( + f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." + ) + args_str = "\n\t".join( + [f"'{arg}': {getattr(args, arg)}" for arg in vars(args)] + ) + logging.info(f"Script called with arguments: \n\t{args_str}") + + # Run + try: + script_main(process, lims, *args, **kwargs) + + except Exception as e: + # Post error to LIMS GUI + logging.error(str(e), exc_info=True) + logging.shutdown() + upload_file( + file_path=log_filename, + file_slot=args.log, + process=process, + lims=lims, + ) + os.remove(log_filename) + sys.stderr.write(str(e)) + sys.exit(2) + else: + logging.info("") + logging.info("Script completed successfully.") + logging.shutdown() + upload_file( + file_path=log_filename, + file_slot=args.log, + process=process, + lims=lims, + ) + # Check log for errors and warnings + log_content = open(log_filename).read() + os.remove(log_filename) + if "ERROR:" in log_content or "WARNING:" in log_content: + sys.stderr.write( + "Script finished successfully, but log contains errors or warnings, please have a look." + ) + sys.exit(2) + else: + sys.exit(0) + + return epp_wrapper + + return _epp_decorator diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index 88ce0410..f9289368 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -4,7 +4,6 @@ import os import re import shutil -import sys from argparse import ArgumentParser from datetime import datetime as dt @@ -17,8 +16,9 @@ from tabulate import tabulate from data.ONT_barcodes import ONT_BARCODE_LABEL_PATTERN, ONT_BARCODES -from scilifelab_epps.epp_utils.udf_tools import fetch from scilifelab_epps.epp import traceback_to_step, upload_file +from scilifelab_epps.epp_utils.udf_tools import fetch +from scilifelab_epps.wrapper import epp_decorator DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs. """ @@ -298,7 +298,7 @@ def write_minknow_csv(df: pd.DataFrame, file_path: str): df_csv.to_csv(file_path, index=False) -def generate_MinKNOW_samplesheet(process: Process): +def generate_MinKNOW_samplesheet(args): """=== Sample sheet columns === flow_cell_id E.g. 'PAM96489' @@ -322,6 +322,10 @@ def generate_MinKNOW_samplesheet(process: Process): - barcode """ + + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + qc = True if "QC" in process.type.name else False logging.info(f"QC run: {qc}") @@ -470,7 +474,35 @@ def generate_MinKNOW_samplesheet(process: Process): return file_name -def main(): +@epp_decorator(__file__) +def main(args): + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + file_name = generate_MinKNOW_samplesheet(args) + + logging.info("Uploading samplesheet to LIMS...") + upload_file( + file_name, + args.file, + process, + lims, + ) + + logging.info("Moving samplesheet to ngi-nas-ns...") + try: + shutil.copyfile( + file_name, + f"/srv/ngi-nas-ns/samplesheets/nanopore/{dt.now().year}/{file_name}", + ) + os.remove(file_name) + except: + logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True) + else: + logging.info("Samplesheet moved to ngi-nas-ns.") + + +if __name__ == "__main__": # Parse args parser = ArgumentParser(description=DESC) parser.add_argument( @@ -493,95 +525,4 @@ def main(): ) args = parser.parse_args() - # Set up LIMS - lims = Lims(BASEURI, USERNAME, PASSWORD) - lims.check_version() - process = Process(lims, id=args.pid) - - # Set up logging - log_filename: str = ( - "_".join( - [ - SCRIPT_NAME, - process.id, - TIMESTAMP, - process.technician.name.replace(" ", ""), - ] - ) - + ".log" - ) - - logging.basicConfig( - filename=log_filename, - filemode="w", - format="%(levelname)s: %(message)s", - level=logging.INFO, - ) - - # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") - logging.info( - f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." - ) - args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]) - logging.info(f"Script called with arguments: \n\t{args_str}") - - try: - file_name = generate_MinKNOW_samplesheet(process=process) - logging.info("Uploading samplesheet to LIMS...") - upload_file( - file_name, - args.file, - process, - lims, - ) - - logging.info("Moving samplesheet to ngi-nas-ns...") - try: - shutil.copyfile( - file_name, - f"/srv/ngi-nas-ns/samplesheets/nanopore/{dt.now().year}/{file_name}", - ) - os.remove(file_name) - except: - logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True) - else: - logging.info("Samplesheet moved to ngi-nas-ns.") - - except Exception as e: - # Post error to LIMS GUI - logging.error(str(e), exc_info=True) - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - os.remove(log_filename) - sys.stderr.write(str(e)) - sys.exit(2) - else: - logging.info("") - logging.info("Script completed successfully.") - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - # Check log for errors and warnings - log_content = open(log_filename).read() - os.remove(log_filename) - if "ERROR:" in log_content or "WARNING:" in log_content: - sys.stderr.write( - "Script finished successfully, but log contains errors or warnings, please have a look." - ) - sys.exit(2) - else: - sys.exit(0) - - -if __name__ == "__main__": - main() + main(args) From 9f599f56ca24e73fd76fbd2be55d7bc8fa1db22e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 11:50:27 +0200 Subject: [PATCH 06/50] exclude __init__.py in scripts/ from attempted entry point build --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 886deb80..9856d996 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,9 @@ url="https://github.com/scilifelab/scilifelab_epps", license="GPLv3", packages=find_packages(exclude=["ez_setup", "examples", "tests"]), - scripts=glob.glob("scripts/*.py"), + scripts=[ + file for file in glob.glob("scripts/*.py") if file != "scripts/__init__.py" + ], include_package_data=True, zip_safe=False, ) From 2e694eb9c659462cd83cf039cf512691d9a6b71a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 11:50:38 +0200 Subject: [PATCH 07/50] wip --- scilifelab_epps/wrapper.py | 17 +++++++---------- scripts/generate_minknow_samplesheet.py | 13 +++++++------ 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py index 2542d510..48025426 100644 --- a/scilifelab_epps/wrapper.py +++ b/scilifelab_epps/wrapper.py @@ -11,16 +11,13 @@ def epp_decorator(file: str): - """Decorator for passing file info.""" - SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] + script_name: str = os.path.basename(file).split(".")[0] def _epp_decorator(script_main): - """Decorator for wrapping EPP scripts.""" - - def epp_wrapper(*args, **kwargs): + def epp_wrapper(args): """General wrapper for EPP scripts.""" - TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") + timestamp = dt.now().strftime("%y%m%d_%H%M%S") # Set up LIMS lims = Lims(BASEURI, USERNAME, PASSWORD) @@ -31,9 +28,9 @@ def epp_wrapper(*args, **kwargs): log_filename: str = ( "_".join( [ - SCRIPT_NAME, + script_name, process.id, - TIMESTAMP, + timestamp, process.technician.name.replace(" ", ""), ] ) @@ -49,7 +46,7 @@ def epp_wrapper(*args, **kwargs): ) # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") + logging.info(f"Script '{script_name}' started at {timestamp}.") logging.info( f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." ) @@ -60,7 +57,7 @@ def epp_wrapper(*args, **kwargs): # Run try: - script_main(process, lims, *args, **kwargs) + script_main(args) except Exception as e: # Post error to LIMS GUI diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index f9289368..9176b731 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -23,12 +23,6 @@ DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs. """ -TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") -SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] - -with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f: - config = yaml.safe_load(f) - def get_ont_library_contents( ont_library: Artifact, @@ -194,6 +188,9 @@ def get_ont_library_contents( def get_pool_sample_label_mapping(pool: Artifact) -> dict[str, str]: + with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f: + config = yaml.safe_load(f) + # Setup DB connection connection = psycopg2.connect( user=config["username"], @@ -525,4 +522,8 @@ def main(args): ) args = parser.parse_args() + import ipdb + + ipdb.set_trace() + main(args) From 8d4b54b153072ff2353d9ccbc49619b01e70c9a5 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 11:56:43 +0200 Subject: [PATCH 08/50] cleanup --- scilifelab_epps/wrapper.py | 4 +++- scripts/generate_minknow_samplesheet.py | 4 ---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py index 48025426..7a3c339a 100644 --- a/scilifelab_epps/wrapper.py +++ b/scilifelab_epps/wrapper.py @@ -59,6 +59,7 @@ def epp_wrapper(args): try: script_main(args) + # On script error except Exception as e: # Post error to LIMS GUI logging.error(str(e), exc_info=True) @@ -72,8 +73,9 @@ def epp_wrapper(args): os.remove(log_filename) sys.stderr.write(str(e)) sys.exit(2) + + # On script success else: - logging.info("") logging.info("Script completed successfully.") logging.shutdown() upload_file( diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index 9176b731..4ce599cc 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -522,8 +522,4 @@ def main(args): ) args = parser.parse_args() - import ipdb - - ipdb.set_trace() - main(args) From f2441a5bfbd37cccc2b13e6f58245dfb5e4f1ada Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 12:16:26 +0200 Subject: [PATCH 09/50] ready for testing --- scilifelab_epps/wrapper.py | 12 +++++++----- scripts/generate_minknow_samplesheet.py | 4 +++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py index 7a3c339a..0d10ee4d 100644 --- a/scilifelab_epps/wrapper.py +++ b/scilifelab_epps/wrapper.py @@ -1,7 +1,6 @@ import logging import os import sys -from datetime import datetime as dt from genologics.config import BASEURI, PASSWORD, USERNAME from genologics.entities import Process @@ -10,15 +9,18 @@ from scilifelab_epps.epp import upload_file -def epp_decorator(file: str): - script_name: str = os.path.basename(file).split(".")[0] +def epp_decorator(script_path: str, timestamp: str): + """This top-level decorator is meant to be used on EPP scripts' main functions. + + It receives the script path (__file__) and timestamp (yymmdd_hhmmss) as arguments to + pass on to it's children which wrap the main function to handle logging and graceful failure. + """ + script_name: str = os.path.basename(script_path).split(".")[0] def _epp_decorator(script_main): def epp_wrapper(args): """General wrapper for EPP scripts.""" - timestamp = dt.now().strftime("%y%m%d_%H%M%S") - # Set up LIMS lims = Lims(BASEURI, USERNAME, PASSWORD) lims.check_version() diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index 4ce599cc..de2e0828 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -23,6 +23,8 @@ DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs. """ +TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") + def get_ont_library_contents( ont_library: Artifact, @@ -471,7 +473,7 @@ def generate_MinKNOW_samplesheet(args): return file_name -@epp_decorator(__file__) +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) def main(args): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) From 42137a07ba986131e1f0ba3c7812937eca9fedfc Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 12:21:35 +0200 Subject: [PATCH 10/50] add header --- scilifelab_epps/wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scilifelab_epps/wrapper.py b/scilifelab_epps/wrapper.py index 0d10ee4d..5380fbee 100644 --- a/scilifelab_epps/wrapper.py +++ b/scilifelab_epps/wrapper.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + import logging import os import sys From 71ad55709fd1f7a207769b69b8ea64ba5ced3d3b Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 12:34:13 +0200 Subject: [PATCH 11/50] renaming and reorganizing --- .../__init__.py | 0 .../calculation_methods.py | 4 ++-- .../udf_arg_methods.py | 2 +- scilifelab_epps/{epp_utils => utils}/__init__.py | 0 scilifelab_epps/{epp_utils => utils}/formula.py | 0 .../{epp_utils => utils}/udf_tools.py | 0 scilifelab_epps/zika/__init__.py | 0 .../zika/methods.py | 2 +- .../zika/utils.py | 2 +- scripts/bravo_csv.py | 16 +++++++--------- scripts/calc_from_args.py | 2 +- scripts/generate_minknow_samplesheet.py | 2 +- scripts/log_udfs.py | 2 +- scripts/molar_concentration.py | 2 +- scripts/ont_calc_volumes.py | 2 +- scripts/ont_pool.py | 2 +- scripts/ont_sync_to_db.py | 2 +- scripts/ont_update_amount.py | 2 +- scripts/parse_anglerfish_results.py | 2 +- scripts/parse_ba_results.py | 2 +- scripts/qc_amount_calculation.py | 2 +- 21 files changed, 23 insertions(+), 25 deletions(-) rename scilifelab_epps/{calc_from_args_utils => calc_from_args}/__init__.py (100%) rename scilifelab_epps/{calc_from_args_utils => calc_from_args}/calculation_methods.py (99%) rename scilifelab_epps/{calc_from_args_utils => calc_from_args}/udf_arg_methods.py (98%) rename scilifelab_epps/{epp_utils => utils}/__init__.py (100%) rename scilifelab_epps/{epp_utils => utils}/formula.py (100%) rename scilifelab_epps/{epp_utils => utils}/udf_tools.py (100%) create mode 100644 scilifelab_epps/zika/__init__.py rename scripts/zika_methods.py => scilifelab_epps/zika/methods.py (99%) rename scripts/zika_utils.py => scilifelab_epps/zika/utils.py (99%) diff --git a/scilifelab_epps/calc_from_args_utils/__init__.py b/scilifelab_epps/calc_from_args/__init__.py similarity index 100% rename from scilifelab_epps/calc_from_args_utils/__init__.py rename to scilifelab_epps/calc_from_args/__init__.py diff --git a/scilifelab_epps/calc_from_args_utils/calculation_methods.py b/scilifelab_epps/calc_from_args/calculation_methods.py similarity index 99% rename from scilifelab_epps/calc_from_args_utils/calculation_methods.py rename to scilifelab_epps/calc_from_args/calculation_methods.py index 7e7abfc3..f048a0ea 100644 --- a/scilifelab_epps/calc_from_args_utils/calculation_methods.py +++ b/scilifelab_epps/calc_from_args/calculation_methods.py @@ -7,12 +7,12 @@ import tabulate from genologics.entities import Process -from scilifelab_epps.calc_from_args_utils.udf_arg_methods import ( +from scilifelab_epps.calc_from_args.udf_arg_methods import ( fetch_from_arg, get_UDF_source, get_UDF_source_name, ) -from scilifelab_epps.epp_utils import formula, udf_tools +from scilifelab_epps.utils import formula, udf_tools DESC = """This file contains the method functions for a UDF-agnostic script.""" diff --git a/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py b/scilifelab_epps/calc_from_args/udf_arg_methods.py similarity index 98% rename from scilifelab_epps/calc_from_args_utils/udf_arg_methods.py rename to scilifelab_epps/calc_from_args/udf_arg_methods.py index a321025f..1040c91d 100644 --- a/scilifelab_epps/calc_from_args_utils/udf_arg_methods.py +++ b/scilifelab_epps/calc_from_args/udf_arg_methods.py @@ -5,7 +5,7 @@ import yaml from genologics.entities import Artifact, Process -from scilifelab_epps.epp_utils import udf_tools +from scilifelab_epps.utils import udf_tools def fetch_from_arg( diff --git a/scilifelab_epps/epp_utils/__init__.py b/scilifelab_epps/utils/__init__.py similarity index 100% rename from scilifelab_epps/epp_utils/__init__.py rename to scilifelab_epps/utils/__init__.py diff --git a/scilifelab_epps/epp_utils/formula.py b/scilifelab_epps/utils/formula.py similarity index 100% rename from scilifelab_epps/epp_utils/formula.py rename to scilifelab_epps/utils/formula.py diff --git a/scilifelab_epps/epp_utils/udf_tools.py b/scilifelab_epps/utils/udf_tools.py similarity index 100% rename from scilifelab_epps/epp_utils/udf_tools.py rename to scilifelab_epps/utils/udf_tools.py diff --git a/scilifelab_epps/zika/__init__.py b/scilifelab_epps/zika/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/zika_methods.py b/scilifelab_epps/zika/methods.py similarity index 99% rename from scripts/zika_methods.py rename to scilifelab_epps/zika/methods.py index 27d294a5..77c24e23 100644 --- a/scripts/zika_methods.py +++ b/scilifelab_epps/zika/methods.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from scilifelab_epps.epp_utils.udf_tools import is_filled +from scilifelab_epps.utils.udf_tools import is_filled from scripts import zika_utils diff --git a/scripts/zika_utils.py b/scilifelab_epps/zika/utils.py similarity index 99% rename from scripts/zika_utils.py rename to scilifelab_epps/zika/utils.py index 4fdb57d0..26b0b528 100644 --- a/scripts/zika_utils.py +++ b/scilifelab_epps/zika/utils.py @@ -17,7 +17,7 @@ import pandas as pd from genologics.entities import Process -from scilifelab_epps.epp_utils.udf_tools import fetch_last +from scilifelab_epps.utils.udf_tools import fetch_last def verify_step(currentStep, targets=None): diff --git a/scripts/bravo_csv.py b/scripts/bravo_csv.py index 95f3a038..29096fe3 100644 --- a/scripts/bravo_csv.py +++ b/scripts/bravo_csv.py @@ -1,6 +1,5 @@ #!/usr/bin/env python - import logging import os import re @@ -8,12 +7,11 @@ from argparse import ArgumentParser import pandas as pd -import zika_methods -import zika_utils from genologics.config import BASEURI, PASSWORD, USERNAME from genologics.entities import Process from genologics.lims import Lims +from scilifelab_epps import zika from scilifelab_epps.epp import attach_file DESC = """EPP used to create csv files for the bravo robot""" @@ -263,12 +261,12 @@ def prepooling(currentStep, lims): if currentStep.instrument.name == "Zika": if currentStep.type.name == "Illumina DNA No-QC Library Pooling": - zika_methods.pool_fixed_vol( + zika.methods.pool_fixed_vol( currentStep=currentStep, lims=lims, ) else: - zika_methods.pool( + zika.methods.pool( currentStep=currentStep, lims=lims, udfs={ @@ -369,14 +367,14 @@ def setup_qpcr(currentStep, lims): def default_bravo(lims, currentStep, with_total_vol=True): # Re-route to Zika - if zika_utils.verify_step( + if zika.utils.verify_step( currentStep, targets=[ ("SMARTer Pico RNA", "Setup Workset/Plate"), ("QIAseq miRNA", "Setup Workset/Plate"), ], ): - zika_methods.norm( + zika.methods.norm( currentStep=currentStep, lims=lims, udfs={ @@ -388,10 +386,10 @@ def default_bravo(lims, currentStep, with_total_vol=True): "final_conc": None, }, ) - elif zika_utils.verify_step( + elif zika.utils.verify_step( currentStep, targets=[("Amplicon", "Setup Workset/Plate")] ): - zika_methods.norm( + zika.methods.norm( currentStep=currentStep, lims=lims, # Use lower minimum pipetting volume and customer metrics diff --git a/scripts/calc_from_args.py b/scripts/calc_from_args.py index 1f6e48b1..d5f73932 100644 --- a/scripts/calc_from_args.py +++ b/scripts/calc_from_args.py @@ -9,7 +9,7 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.calc_from_args_utils import calculation_methods +from scilifelab_epps.calc_from_args import calculation_methods from scilifelab_epps.epp import upload_file DESC = """UDF-agnostic script to perform calculations across all artifacts of a step. diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index de2e0828..1a957632 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -17,7 +17,7 @@ from data.ONT_barcodes import ONT_BARCODE_LABEL_PATTERN, ONT_BARCODES from scilifelab_epps.epp import traceback_to_step, upload_file -from scilifelab_epps.epp_utils.udf_tools import fetch +from scilifelab_epps.utils.udf_tools import fetch from scilifelab_epps.wrapper import epp_decorator DESC = """ Script to generate MinKNOW samplesheet for starting ONT runs. diff --git a/scripts/log_udfs.py b/scripts/log_udfs.py index 24fb52f6..e42d6af0 100644 --- a/scripts/log_udfs.py +++ b/scripts/log_udfs.py @@ -12,7 +12,7 @@ from ont_send_reloading_info_to_db import parse_run from tabulate import tabulate -from scilifelab_epps.epp_utils import udf_tools +from scilifelab_epps.utils import udf_tools DESC = """Script for the EPP "Log fields" and file slot "Field log". diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py index 1b0eee71..e7603da0 100644 --- a/scripts/molar_concentration.py +++ b/scripts/molar_concentration.py @@ -16,7 +16,7 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.epp_utils.formula import ng_ul_to_nM +from scilifelab_epps.utils.formula import ng_ul_to_nM from scilifelab_epps.epp import EppLogger diff --git a/scripts/ont_calc_volumes.py b/scripts/ont_calc_volumes.py index b9528a9f..9c7520ae 100644 --- a/scripts/ont_calc_volumes.py +++ b/scripts/ont_calc_volumes.py @@ -8,7 +8,7 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.epp_utils import formula, udf_tools +from scilifelab_epps.utils import formula, udf_tools DESC = """ EPP "ONT calculate volumes" diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py index 9a8b7304..076dc8e4 100644 --- a/scripts/ont_pool.py +++ b/scripts/ont_pool.py @@ -11,7 +11,7 @@ from tabulate import tabulate from zika_utils import fetch_sample_data -from scilifelab_epps.epp_utils import formula +from scilifelab_epps.utils import formula DESC = """ EPP "ONT pooling", file slot "ONT pooling log". diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py index 95c0e64c..29367148 100644 --- a/scripts/ont_sync_to_db.py +++ b/scripts/ont_sync_to_db.py @@ -18,7 +18,7 @@ from genologics.lims import Lims from ont_send_reloading_info_to_db import get_ONT_db -from scilifelab_epps.epp_utils import udf_tools +from scilifelab_epps.utils import udf_tools from scilifelab_epps.epp import upload_file DESC = """Script for finishing the step to start ONT sequencing in LIMS. diff --git a/scripts/ont_update_amount.py b/scripts/ont_update_amount.py index b0b215ca..f3c6fbfc 100644 --- a/scripts/ont_update_amount.py +++ b/scripts/ont_update_amount.py @@ -8,7 +8,7 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.epp_utils import formula, udf_tools +from scilifelab_epps.utils import formula, udf_tools DESC = """ EPP "ONT Update Amounts". diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index c9856801..6fec39b5 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -11,7 +11,7 @@ from genologics.entities import Artifact, Process from genologics.lims import Lims -from scilifelab_epps.epp_utils import udf_tools +from scilifelab_epps.utils import udf_tools from scilifelab_epps.epp import upload_file TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S") diff --git a/scripts/parse_ba_results.py b/scripts/parse_ba_results.py index f19c9eac..78f96465 100644 --- a/scripts/parse_ba_results.py +++ b/scripts/parse_ba_results.py @@ -11,7 +11,7 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.epp_utils import udf_tools +from scilifelab_epps.utils import udf_tools from scilifelab_epps.epp import get_well_number DESC = """This script parses the Agilent BioAnalyzer XML report. diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py index ab119967..ab39568c 100644 --- a/scripts/qc_amount_calculation.py +++ b/scripts/qc_amount_calculation.py @@ -16,7 +16,7 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.epp_utils import formula, udf_tools +from scilifelab_epps.utils import formula, udf_tools from scilifelab_epps.epp import EppLogger From 400488b660d52ee41bc3dc921629dfcc69f240cc Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 12:34:23 +0200 Subject: [PATCH 12/50] implement wrapper --- scripts/generate_anglerfish_samplesheet.py | 125 +++++---------------- 1 file changed, 30 insertions(+), 95 deletions(-) diff --git a/scripts/generate_anglerfish_samplesheet.py b/scripts/generate_anglerfish_samplesheet.py index e7a6f9f7..3b4cc925 100644 --- a/scripts/generate_anglerfish_samplesheet.py +++ b/scripts/generate_anglerfish_samplesheet.py @@ -4,7 +4,6 @@ import os import re import shutil -import sys from argparse import ArgumentParser from datetime import datetime as dt @@ -16,12 +15,12 @@ from data.Chromium_10X_indexes import Chromium_10X_indexes from data.ONT_barcodes import ONT_BARCODES from scilifelab_epps.epp import upload_file +from scilifelab_epps.wrapper import epp_decorator DESC = """Script to generate Anglerfish samplesheet for ONT runs. """ TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") -SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] def generate_anglerfish_samplesheet(process): @@ -124,7 +123,35 @@ def get_adaptor_name(reagent_label: str) -> str | list[str]: ) -def main(): +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args): + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + file_name = generate_anglerfish_samplesheet(process) + + logging.info("Uploading samplesheet to LIMS...") + upload_file( + file_name, + args.file, + process, + lims, + ) + + logging.info("Moving samplesheet to ngi-nas-ns...") + try: + shutil.copyfile( + file_name, + f"/srv/ngi-nas-ns/samplesheets/anglerfish/{dt.now().year}/{file_name}", + ) + os.remove(file_name) + except: + logging.error("Failed to move samplesheet to ngi-nas-ns.") + else: + logging.info("Samplesheet moved to ngi-nas-ns.") + + +if __name__ == "__main__": # Parse args parser = ArgumentParser(description=DESC) parser.add_argument( @@ -147,96 +174,4 @@ def main(): ) args = parser.parse_args() - # Set up LIMS - lims = Lims(BASEURI, USERNAME, PASSWORD) - lims.check_version() - process = Process(lims, id=args.pid) - - # Set up logging - log_filename: str = ( - "_".join( - [ - SCRIPT_NAME, - process.id, - TIMESTAMP, - process.technician.name.replace(" ", ""), - ] - ) - + ".log" - ) - - logging.basicConfig( - filename=log_filename, - filemode="w", - format="%(levelname)s: %(message)s", - level=logging.INFO, - ) - - # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") - logging.info( - f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." - ) - args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]) - logging.info(f"Script called with arguments: \n\t{args_str}") - - try: - file_name = generate_anglerfish_samplesheet(process) - - logging.info("Uploading samplesheet to LIMS...") - upload_file( - file_name, - args.file, - process, - lims, - ) - - logging.info("Moving samplesheet to ngi-nas-ns...") - try: - shutil.copyfile( - file_name, - f"/srv/ngi-nas-ns/samplesheets/anglerfish/{dt.now().year}/{file_name}", - ) - os.remove(file_name) - except: - logging.error("Failed to move samplesheet to ngi-nas-ns.") - else: - logging.info("Samplesheet moved to ngi-nas-ns.") - - except Exception as e: - # Post error to LIMS GUI - logging.error(str(e), exc_info=True) - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - os.remove(log_filename) - sys.stderr.write(str(e)) - sys.exit(2) - else: - logging.info("") - logging.info("Script completed successfully.") - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - # Check log for errors and warnings - log_content = open(log_filename).read() - os.remove(log_filename) - if "ERROR:" in log_content or "WARNING:" in log_content: - sys.stderr.write( - "Script finished successfully, but log contains errors or warnings, please have a look." - ) - sys.exit(2) - else: - sys.exit(0) - - -if __name__ == "__main__": main() From c1867fd59bd72dfa153f80e171a498cbd783fb97 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:16:35 +0200 Subject: [PATCH 13/50] wrap script --- scripts/ont_send_reloading_info_to_db.py | 76 ++++-------------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/scripts/ont_send_reloading_info_to_db.py b/scripts/ont_send_reloading_info_to_db.py index 2da91b0c..de8fc6d8 100644 --- a/scripts/ont_send_reloading_info_to_db.py +++ b/scripts/ont_send_reloading_info_to_db.py @@ -3,7 +3,6 @@ import logging import os import re -import sys from argparse import ArgumentParser from datetime import datetime as dt @@ -14,7 +13,7 @@ from genologics.entities import Artifact, Process from genologics.lims import Lims -from scilifelab_epps.epp import upload_file +from scilifelab_epps.wrapper import epp_decorator DESC = """Used to record the washing and reloading of ONT flow cells. @@ -22,7 +21,6 @@ """ TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S") -SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] def send_reloading_info_to_db(process: Process): @@ -178,71 +176,19 @@ def check_csv_udf_list(pattern: str, csv_udf_list: list[str]) -> bool: return True -def main(): - # Parse args - parser = ArgumentParser(description=DESC) - parser.add_argument("--pid", help="Lims id for current Process") - parser.add_argument("--log", type=str, help="Which log file slot to use") - args = parser.parse_args() - - # Set up LIMS +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args): lims = Lims(BASEURI, USERNAME, PASSWORD) - lims.check_version() process = Process(lims, id=args.pid) - # Set up logging - log_filename: str = ( - "_".join( - [ - SCRIPT_NAME, - process.id, - TIMESTAMP, - process.technician.name.replace(" ", ""), - ] - ) - + ".log" - ) - - logging.basicConfig( - filename=log_filename, - filemode="w", - format="%(levelname)s: %(message)s", - level=logging.INFO, - ) - - # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") - logging.info( - f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." - ) - args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]) - logging.info(f"Script called with arguments: \n\t{args_str}") - - try: - send_reloading_info_to_db(process) - except Exception as e: - # Post error to LIMS GUI - logging.error(e) - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - sys.stderr.write(str(e)) - sys.exit(2) - else: - logging.info("Script completed successfully.") - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - sys.exit(0) + send_reloading_info_to_db(process) if __name__ == "__main__": - main() + # Parse args + parser = ArgumentParser(description=DESC) + parser.add_argument("--pid", help="Lims id for current Process") + parser.add_argument("--log", type=str, help="Which log file slot to use") + args = parser.parse_args() + + main(args) From 199193c5129f8b6c4ae0bcdf855453927918d909 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:18:36 +0200 Subject: [PATCH 14/50] wrap script --- scripts/ont_sync_to_db.py | 78 +++++++-------------------------------- 1 file changed, 13 insertions(+), 65 deletions(-) diff --git a/scripts/ont_sync_to_db.py b/scripts/ont_sync_to_db.py index 29367148..9b787c83 100644 --- a/scripts/ont_sync_to_db.py +++ b/scripts/ont_sync_to_db.py @@ -3,7 +3,6 @@ import logging import os import re -import sys from argparse import ArgumentParser, Namespace from datetime import datetime as dt @@ -19,7 +18,7 @@ from ont_send_reloading_info_to_db import get_ONT_db from scilifelab_epps.utils import udf_tools -from scilifelab_epps.epp import upload_file +from scilifelab_epps.wrapper import epp_decorator DESC = """Script for finishing the step to start ONT sequencing in LIMS. @@ -28,7 +27,6 @@ """ TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S") -SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] def assert_samplesheet(process: Process, args: Namespace, lims: Lims): @@ -241,7 +239,17 @@ def sync_runs_to_db(process: Process, args: Namespace, lims: Lims): ) -def main(): +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args): + # Set up LIMS + lims = Lims(BASEURI, USERNAME, PASSWORD) + lims.check_version() + process = Process(lims, id=args.pid) + + sync_runs_to_db(process=process, lims=lims, args=args) + + +if __name__ == "__main__": # Parse args parser = ArgumentParser(description=DESC) parser.add_argument( @@ -263,64 +271,4 @@ def main(): ) args: Namespace = parser.parse_args() - # Set up LIMS - lims = Lims(BASEURI, USERNAME, PASSWORD) - lims.check_version() - process = Process(lims, id=args.pid) - - # Set up logging - log_filename: str = ( - "_".join( - [ - SCRIPT_NAME, - process.id, - TIMESTAMP, - process.technician.name.replace(" ", ""), - ] - ) - + ".log" - ) - - logging.basicConfig( - filename=log_filename, - filemode="w", - format="%(filename)s - %(funcName)s - %(levelname)s - %(message)s", - level=logging.INFO, - ) - - # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") - logging.info( - f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." - ) - args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]) - logging.info(f"Script called with arguments: \n\t{args_str}") - - try: - sync_runs_to_db(process=process, lims=lims, args=args) - except Exception as e: - # Post error to LIMS GUI - logging.error(e, exc_info=True) - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - sys.stderr.write(str(e)) - sys.exit(2) - else: - logging.info("Script completed successfully.") - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - sys.exit(0) - - -if __name__ == "__main__": - main() + main(args) From 665a3cf1a382a33c318d933bc15a99da1884ab15 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:21:00 +0200 Subject: [PATCH 15/50] wrap script --- scripts/parse_anglerfish_results.py | 77 +++++------------------------ 1 file changed, 12 insertions(+), 65 deletions(-) diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index 6fec39b5..13797087 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -2,7 +2,6 @@ import glob import logging import os -import sys from argparse import ArgumentParser from datetime import datetime as dt @@ -12,10 +11,9 @@ from genologics.lims import Lims from scilifelab_epps.utils import udf_tools -from scilifelab_epps.epp import upload_file +from scilifelab_epps.wrapper import epp_decorator TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S") -SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] def find_run(process: Process) -> str: @@ -197,7 +195,16 @@ def parse_anglerfish_results(process, lims): fill_udfs(process, df_parsed) -def main(): +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args): + # Set up LIMS + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + parse_anglerfish_results(process, lims) + + +if __name__ == "__main__": # Parse args parser = ArgumentParser() parser.add_argument( @@ -217,64 +224,4 @@ def main(): ) args = parser.parse_args() - # Set up LIMS - lims = Lims(BASEURI, USERNAME, PASSWORD) - lims.check_version() - process = Process(lims, id=args.pid) - - # Set up logging - log_filename = ( - "_".join( - [ - SCRIPT_NAME, - process.id, - TIMESTAMP, - process.technician.name.replace(" ", ""), - ] - ) - + ".log" - ) - - logging.basicConfig( - filename=log_filename, - filemode="w", - format="%(levelname)s: %(message)s", - level=logging.INFO, - ) - - # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") - logging.info( - f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." - ) - args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]) - logging.info(f"Script called with arguments: \n\t{args_str}") - - try: - parse_anglerfish_results(process, lims) - except Exception as e: - # Post error to LIMS GUI - logging.error(e, exc_info=True) - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - sys.stderr.write(str(e)) - sys.exit(2) - else: - logging.info("Script completed successfully.") - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - ) - sys.exit(0) - - -if __name__ == "__main__": - main() + main(args) From bf7db1fc2bfa126f1600bed2e501afe4e4b54f89 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:31:18 +0200 Subject: [PATCH 16/50] wrap script --- scripts/calc_from_args.py | 95 +++++++-------------------------------- 1 file changed, 15 insertions(+), 80 deletions(-) diff --git a/scripts/calc_from_args.py b/scripts/calc_from_args.py index d5f73932..82332b9c 100644 --- a/scripts/calc_from_args.py +++ b/scripts/calc_from_args.py @@ -1,7 +1,4 @@ #!/usr/bin/env python -import logging -import os -import sys from argparse import ArgumentParser from datetime import datetime as dt @@ -10,7 +7,7 @@ from genologics.lims import Lims from scilifelab_epps.calc_from_args import calculation_methods -from scilifelab_epps.epp import upload_file +from scilifelab_epps.wrapper import epp_decorator DESC = """UDF-agnostic script to perform calculations across all artifacts of a step. @@ -20,7 +17,6 @@ """ TIMESTAMP: str = dt.now().strftime("%y%m%d_%H%M%S") -SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] def parse_udf_arg(arg_string: str) -> dict: @@ -71,7 +67,8 @@ def parse_udf_arg(arg_string: str) -> dict: return arg_dict -def main(): +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args): f"""Set up log, LIMS instance and parse args. Example 1: @@ -124,6 +121,15 @@ def main(): """ + # Set up LIMS + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + function_to_use = getattr(calculation_methods, args.calc) + function_to_use(process, args) + + +if __name__ == "__main__": # Parse args parser = ArgumentParser(description=DESC) parser.add_argument("--pid", type=str, help="Lims ID for current Process") @@ -134,6 +140,7 @@ def main(): help="Which function to use for calculations", ) parser.add_argument("--log", type=str, help="Which log file slot to use") + # UDFs to use for calculations udf_args = [ "vol_in", @@ -146,79 +153,7 @@ def main(): ] for udf_arg in udf_args: parser.add_argument(f"--{udf_arg}", type=parse_udf_arg) - args = parser.parse_args() - - # Set up LIMS - lims = Lims(BASEURI, USERNAME, PASSWORD) - lims.check_version() - process = Process(lims, id=args.pid) - - # Set up logging - log_filename: str = ( - "_".join( - [ - SCRIPT_NAME, - args.calc, - process.id, - TIMESTAMP, - process.technician.name.replace(" ", ""), - ] - ) - + ".log" - ) - - logging.basicConfig( - filename=log_filename, - filemode="w", - format="%(levelname)s: %(message)s", - level=logging.INFO, - ) - - # Start logging - logging.info(f"Script '{SCRIPT_NAME}' started at {TIMESTAMP}.") - logging.info( - f"Launched in step '{process.type.name}' ({process.id}) by {process.technician.name}." - ) - args_str = "\n\t".join([f"'{arg}': {getattr(args, arg)}" for arg in vars(args)]) - logging.info(f"Script called with arguments: \n\t{args_str}") - - try: - function_to_use = getattr(calculation_methods, args.calc) - function_to_use(process, args) - except Exception as e: - # Post error to LIMS GUI - logging.error(str(e), exc_info=True) - logging.shutdown() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - remove=True, - ) - sys.stderr.write(str(e)) - sys.exit(2) - else: - logging.info("") - logging.info("Script completed successfully.") - logging.shutdown() - log_content = open(log_filename).read() - upload_file( - file_path=log_filename, - file_slot=args.log, - process=process, - lims=lims, - remove=True, - ) - # Check log for errors and warnings - if "ERROR:" in log_content or "WARNING:" in log_content: - sys.stderr.write( - "Script finished successfully, but log contains errors or warnings, please have a look." - ) - sys.exit(2) - else: - sys.exit(0) + args = parser.parse_args() -if __name__ == "__main__": - main() + main(args) From 30feb204e292e17e1b2b1d4a23ee1681090c8887 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:36:54 +0200 Subject: [PATCH 17/50] fix zika references --- scilifelab_epps/zika/methods.py | 54 ++++++++++++++++----------------- scripts/ont_pool.py | 2 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/scilifelab_epps/zika/methods.py b/scilifelab_epps/zika/methods.py index 77c24e23..7c596454 100644 --- a/scilifelab_epps/zika/methods.py +++ b/scilifelab_epps/zika/methods.py @@ -11,8 +11,8 @@ import numpy as np import pandas as pd +from scilifelab_epps import zika from scilifelab_epps.utils.udf_tools import is_filled -from scripts import zika_utils def pool_fixed_vol( @@ -57,7 +57,7 @@ def pool_fixed_vol( "dst_id": "art_tuple[1]['uri'].location[0].id", "dst_well": "art_tuple[1]['uri'].location[1]", } - df_all = zika_utils.fetch_sample_data(currentStep, to_fetch) + df_all = zika.utils.fetch_sample_data(currentStep, to_fetch) # Define deck, a dictionary mapping plate names to deck positions assert len(df_all.src_id.unique()) <= 4, "Only one to four input plates allowed" @@ -77,22 +77,22 @@ def pool_fixed_vol( df_wl = pd.concat([df_wl, df_pool], axis=0) # Format worklist - df_formatted = zika_utils.format_worklist(df_wl.copy(), deck) - wl_filename, log_filename = zika_utils.get_filenames( + df_formatted = zika.utils.format_worklist(df_wl.copy(), deck) + wl_filename, log_filename = zika.utils.get_filenames( method_name="pool", pid=currentStep.id ) # Write the output files - zika_utils.write_worklist( + zika.utils.write_worklist( df=df_formatted.copy(), deck=deck, wl_filename=wl_filename, ) - zika_utils.write_log(log, log_filename) + zika.utils.write_log(log, log_filename) # Upload files - zika_utils.upload_csv(currentStep, lims, wl_filename) - zika_utils.upload_log(currentStep, lims, log_filename) + zika.utils.upload_csv(currentStep, lims, wl_filename) + zika.utils.upload_log(currentStep, lims, log_filename) # Issue warnings, if any if any("WARNING" in entry for entry in log): @@ -187,7 +187,7 @@ def pool( if v: to_fetch[k] = f"art_tuple[1]['uri'].udf['{v}']" - df_all = zika_utils.fetch_sample_data(currentStep, to_fetch) + df_all = zika.utils.fetch_sample_data(currentStep, to_fetch) # All samples should have accessible volume assert all( @@ -321,7 +321,7 @@ def pool( ) errors = True - raise zika_utils.VolumeOverflow + raise zika.utils.VolumeOverflow log.append( "\nAn even pool can be created within the following parameter ranges:" @@ -409,7 +409,7 @@ def pool( ) errors = True - raise zika_utils.VolumeOverflow + raise zika.utils.VolumeOverflow log.append( "\nWill try to create a pool that is as even as possible. Accounting for sample depletion, a pool can be created with the following parameter ranges: " @@ -436,7 +436,7 @@ def pool( # No volume expansion is allowed, so pool volume is set to the minimum, given the conc pool_vol = pool_real_min_sample_vol - except zika_utils.VolumeOverflow: + except zika.utils.VolumeOverflow: continue # === STORE FINAL CALCULATION RESULTS === @@ -518,14 +518,14 @@ def pool( pool.put() # Get filenames and upload log if errors - wl_filename, log_filename = zika_utils.get_filenames( + wl_filename, log_filename = zika.utils.get_filenames( method_name="pool", pid=currentStep.id ) if errors: - raise zika_utils.CheckLog(log, log_filename, lims, currentStep) + raise zika.utils.CheckLog(log, log_filename, lims, currentStep) # Format worklist - df_formatted = zika_utils.format_worklist(df_wl.copy(), deck) + df_formatted = zika.utils.format_worklist(df_wl.copy(), deck) # Comments to attach to the worklist header comments = [ @@ -539,17 +539,17 @@ def pool( ) # Write the output files - zika_utils.write_worklist( + zika.utils.write_worklist( df=df_formatted.copy(), deck=deck, wl_filename=wl_filename, comments=comments, ) - zika_utils.write_log(log, log_filename) + zika.utils.write_log(log, log_filename) # Upload files - zika_utils.upload_csv(currentStep, lims, wl_filename) - zika_utils.upload_log(currentStep, lims, log_filename) + zika.utils.upload_csv(currentStep, lims, wl_filename) + zika.utils.upload_log(currentStep, lims, log_filename) # Issue warnings, if any if any("WARNING" in entry for entry in log): @@ -656,7 +656,7 @@ def norm( if v: to_fetch[k] = f"art_tuple[1]['uri'].udf['{v}']" - df = zika_utils.fetch_sample_data(currentStep, to_fetch) + df = zika.utils.fetch_sample_data(currentStep, to_fetch) conc_unit = "ng/ul" if use_customer_metrics else df.conc_units[0] amt_unit = "ng" if conc_unit == "ng/ul" else "fmol" @@ -789,34 +789,34 @@ def norm( wl_comments = [] # Resolve buffer transfers - df_buffer, wl_comments = zika_utils.resolve_buffer_transfers( + df_buffer, wl_comments = zika.utils.resolve_buffer_transfers( df=df.copy(), wl_comments=wl_comments ) # Format worklist - df_formatted = zika_utils.format_worklist(df_buffer.copy(), deck=deck) + df_formatted = zika.utils.format_worklist(df_buffer.copy(), deck=deck) wl_comments.append( f"This worklist will enact normalization of {len(df)} samples. For detailed parameters see the worklist log" ) # Write files - wl_filename, log_filename = zika_utils.get_filenames( + wl_filename, log_filename = zika.utils.get_filenames( method_name="norm", pid=currentStep.id ) - zika_utils.write_worklist( + zika.utils.write_worklist( df=df_formatted.copy(), deck=deck, wl_filename=wl_filename, comments=wl_comments, ) - zika_utils.write_log(log, log_filename) + zika.utils.write_log(log, log_filename) # Upload files - zika_utils.upload_csv(currentStep, lims, wl_filename) - zika_utils.upload_log(currentStep, lims, log_filename) + zika.utils.upload_csv(currentStep, lims, wl_filename) + zika.utils.upload_log(currentStep, lims, log_filename) # Issue warnings, if any if any("WARNING" in entry for entry in log): diff --git a/scripts/ont_pool.py b/scripts/ont_pool.py index 076dc8e4..5835d31d 100644 --- a/scripts/ont_pool.py +++ b/scripts/ont_pool.py @@ -9,9 +9,9 @@ from genologics.lims import Lims from numpy import minimum from tabulate import tabulate -from zika_utils import fetch_sample_data from scilifelab_epps.utils import formula +from scilifelab_epps.zika.utils import fetch_sample_data DESC = """ EPP "ONT pooling", file slot "ONT pooling log". From 774f3313338f631098dc69d7f50ec736acf5b95a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:37:54 +0200 Subject: [PATCH 18/50] ruff safe check --- scripts/molar_concentration.py | 2 +- scripts/parse_ba_results.py | 2 +- scripts/qc_amount_calculation.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/molar_concentration.py b/scripts/molar_concentration.py index e7603da0..57af5238 100644 --- a/scripts/molar_concentration.py +++ b/scripts/molar_concentration.py @@ -16,8 +16,8 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.utils.formula import ng_ul_to_nM from scilifelab_epps.epp import EppLogger +from scilifelab_epps.utils.formula import ng_ul_to_nM def apply_calculations(lims, artifacts, conc_udf, size_udf, unit_udf, epp_logger): diff --git a/scripts/parse_ba_results.py b/scripts/parse_ba_results.py index 78f96465..3f437148 100644 --- a/scripts/parse_ba_results.py +++ b/scripts/parse_ba_results.py @@ -11,8 +11,8 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.utils import udf_tools from scilifelab_epps.epp import get_well_number +from scilifelab_epps.utils import udf_tools DESC = """This script parses the Agilent BioAnalyzer XML report. diff --git a/scripts/qc_amount_calculation.py b/scripts/qc_amount_calculation.py index ab39568c..2e258259 100644 --- a/scripts/qc_amount_calculation.py +++ b/scripts/qc_amount_calculation.py @@ -16,8 +16,8 @@ from genologics.entities import Process from genologics.lims import Lims -from scilifelab_epps.utils import formula, udf_tools from scilifelab_epps.epp import EppLogger +from scilifelab_epps.utils import formula, udf_tools def apply_calculations(artifacts, udf1, op, udf2, unit_amount_map, process): From 0cd452c37913d5ba252ab5e1afbc9ea44b2cee84 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 14:44:50 +0200 Subject: [PATCH 19/50] bump vlog --- VERSIONLOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 8c5d0a20..9d1d3303 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240820.1 + +Re-organize repo to follow best-practice modularization and implement EPP wrapper. + ## 20240816.1 Set up fixed-volume pooling by Zika for no-QC libraries. From 8c2991b73a047e363d861a3daf9444e146d488e8 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 16:10:38 +0200 Subject: [PATCH 20/50] aviti dev init --- scripts/generate_aviti_run_manifest.py | 53 ++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/generate_aviti_run_manifest.py diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py new file mode 100644 index 00000000..33da90fb --- /dev/null +++ b/scripts/generate_aviti_run_manifest.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import logging +import os +import re +import shutil +from argparse import ArgumentParser +from datetime import datetime as dt + +from genologics.config import BASEURI, PASSWORD, USERNAME +from genologics.entities import Process +from genologics.lims import Lims + +from scilifelab_epps.wrapper import epp_decorator + +DESC = """Script to generate Anglerfish samplesheet for ONT runs. +""" + +TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") + + +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args): + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + pass + + +if __name__ == "__main__": + # Parse args + parser = ArgumentParser(description=DESC) + parser.add_argument( + "--pid", + required=True, + type=str, + help="Lims ID for current Process.", + ) + parser.add_argument( + "--log", + required=True, + type=str, + help="Which file slot to use for the script log.", + ) + parser.add_argument( + "--file", + required=True, + type=str, + help="Which file slot to use for the run manifest.", + ) + args = parser.parse_args() + + main() From 58fe48c5638ce6b5a5a7924f526fd5300e9d1de5 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 16:11:36 +0200 Subject: [PATCH 21/50] bugfix --- scripts/generate_aviti_run_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 33da90fb..b59da68a 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -50,4 +50,4 @@ def main(args): ) args = parser.parse_args() - main() + main(args) From 1cf5a67d9463a03c10093a943ea99b7f20aac685 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 17:42:45 +0200 Subject: [PATCH 22/50] and i OOP --- scripts/generate_aviti_run_manifest.py | 72 ++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index b59da68a..8afb4990 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -1,10 +1,8 @@ #!/usr/bin/env python import logging -import os -import re -import shutil from argparse import ArgumentParser +from dataclasses import dataclass, field from datetime import datetime as dt from genologics.config import BASEURI, PASSWORD, USERNAME @@ -19,11 +17,79 @@ TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") +class Row: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + def write(self, f): + for attr in self.__dict__.values(): + if isinstance(attr, str) and "," in attr: + f.write(f'"{attr}", ') + else: + f.write(f"{attr}, ") + f.write("\n") + + +class Section: + def __init__(self) -> None: + self.rows: list[Row] = [] + + def add(self, row: Row): + self.rows.append(row) + + def write(self, f): + f.write(f"{self.mark_start}\n") + for row in self.rows: + row.write(f) + f.write("\n") + + +class RunValues(Section): + def __init__(self) -> None: + super().__init__() + self.mark_start: str = "[Run Values]" + self.cols: list[str] = ["KeyName", "Value"] + + +class Settings(Section): + def __init__(self) -> None: + super().__init__() + self.mark_start: str = "[Settings]" + self.cols: list[str] = ["SettingName", "Value"] + + +class Samples(Section): + def __init__(self) -> None: + super().__init__() + self.mark_start: str = "[Samples]" + self.cols: list[str] = [ + "SampleName", + "Index1", + "Index2", + "Lane", + "Project", + "ExternalID", + ] + + +class Manifest: + def __init__(self) -> None: + self.sections: list[Section] = [RunValues(), Settings(), Samples()] + + def write(self, file_path: str): + with open(file_path, "w") as f: + for section in self.sections: + section.write(f) + + @epp_decorator(script_path=__file__, timestamp=TIMESTAMP) def main(args): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) + logging.info("Starting to build run manifest.") + pass From cec8ace8632565cacd9e465187337ecf5ef8dda3 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 20 Aug 2024 17:56:15 +0200 Subject: [PATCH 23/50] use dataclass to get __repr__ for FREE --- scripts/generate_aviti_run_manifest.py | 36 ++++++++++++++------------ 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 8afb4990..522287ec 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -17,6 +17,7 @@ TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") +@dataclass class Row: def __init__(self, **kwargs): for k, v in kwargs.items(): @@ -31,9 +32,9 @@ def write(self, f): f.write("\n") +@dataclass class Section: - def __init__(self) -> None: - self.rows: list[Row] = [] + rows: list[Row] = field(default_factory=list) def add(self, row: Row): self.rows.append(row) @@ -45,25 +46,23 @@ def write(self, f): f.write("\n") +@dataclass class RunValues(Section): - def __init__(self) -> None: - super().__init__() - self.mark_start: str = "[Run Values]" - self.cols: list[str] = ["KeyName", "Value"] + mark_start: str = "[Run Values]" + cols: list[str] = field(default_factory=lambda: ["KeyName", "Value"]) +@dataclass class Settings(Section): - def __init__(self) -> None: - super().__init__() - self.mark_start: str = "[Settings]" - self.cols: list[str] = ["SettingName", "Value"] + mark_start: str = "[Settings]" + cols: list[str] = field(default_factory=lambda: ["SettingName", "Value"]) +@dataclass class Samples(Section): - def __init__(self) -> None: - super().__init__() - self.mark_start: str = "[Samples]" - self.cols: list[str] = [ + mark_start: str = "[Samples]" + cols: list[str] = field( + default_factory=lambda: [ "SampleName", "Index1", "Index2", @@ -71,15 +70,18 @@ def __init__(self) -> None: "Project", "ExternalID", ] + ) +@dataclass class Manifest: - def __init__(self) -> None: - self.sections: list[Section] = [RunValues(), Settings(), Samples()] + runvalues: RunValues = RunValues() + settings: Settings = Settings() + samples: Samples = Samples() def write(self, file_path: str): with open(file_path, "w") as f: - for section in self.sections: + for section in [self.runvalues, self.settings, self.samples]: section.write(f) From 7602cb89cd2dedc1279cb86bdf2679bb9c0157fc Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 21 Aug 2024 11:51:40 +0200 Subject: [PATCH 24/50] read sample info --- scripts/generate_aviti_run_manifest.py | 57 +++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 522287ec..b38585fb 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -5,14 +5,13 @@ from dataclasses import dataclass, field from datetime import datetime as dt +import pandas as pd from genologics.config import BASEURI, PASSWORD, USERNAME from genologics.entities import Process from genologics.lims import Lims from scilifelab_epps.wrapper import epp_decorator - -DESC = """Script to generate Anglerfish samplesheet for ONT runs. -""" +from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") @@ -40,7 +39,8 @@ def add(self, row: Row): self.rows.append(row) def write(self, f): - f.write(f"{self.mark_start}\n") + f.write(self.mark_start + "\n") + f.write(", ".join(self.cols) + "\n") for row in self.rows: row.write(f) f.write("\n") @@ -92,12 +92,57 @@ def main(args): logging.info("Starting to build run manifest.") - pass + # Get the analytes placed into the flowcell + arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] + + # Iterate over pools + rows = [] + for art_out in arts_out: + assert ( + art_out.container.type.name == "AVITI Flow Cell" + ), "Unsupported container type." + assert ( + len(art_out.samples) > 1 and len(art_out.reagent_labels) > 1 + ), "Not a pool." + assert len(art_out.samples) == len( + art_out.reagent_labels + ), "Unequal number of samples and reagent labels." + + lane: str = art_out.location[1].split(":")[1] + sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out) + samples = art_out.samples + labels = art_out.reagent_labels + + assert len(labels.unique()) == len( + labels + ), "Detected non-unique reagent labels." + + # Iterate over samples + + for sample in samples: + lims_label = sample2label[sample.name] + + if "-" in lims_label: + index1, index2 = lims_label.split("-") + else: + index1 = lims_label + index2 = None + + row = {} + row["SampleName"] = sample.name + row["Index1"] = index1 + row["Index2"] = index2 + row["Lane"] = lane + + rows.append(row) + + df = pd.DataFrame(rows) + samples = f"[Samples]\n{df.to_csv(index=None, header=True)}" if __name__ == "__main__": # Parse args - parser = ArgumentParser(description=DESC) + parser = ArgumentParser() parser.add_argument( "--pid", required=True, From b74cc367bc08a46043d9a14c4676ceb4d48c693e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 21 Aug 2024 15:51:00 +0200 Subject: [PATCH 25/50] wip, samples section seems done --- scripts/generate_aviti_run_manifest.py | 96 +++++++++++++++++++++----- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index b38585fb..0f8056e2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import logging -from argparse import ArgumentParser +from argparse import ArgumentParser, Namespace from dataclasses import dataclass, field from datetime import datetime as dt @@ -85,19 +85,21 @@ def write(self, file_path: str): section.write(f) -@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) -def main(args): - lims = Lims(BASEURI, USERNAME, PASSWORD) - process = Process(lims, id=args.pid) - - logging.info("Starting to build run manifest.") +def get_samples_section(process: Process) -> str: + """Generate the [Samples] section of the AVITI run manifest and return it as a string.""" # Get the analytes placed into the flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] + # Assert that both flowcell lanes are filled + assert set([art_out.location[1].split(":")[1] for art_out in arts_out]) == set( + ["1", "2"] + ), "Expected two populated lanes." + # Iterate over pools - rows = [] + all_rows = [] for art_out in arts_out: + lane_rows = [] assert ( art_out.container.type.name == "AVITI Flow Cell" ), "Unsupported container type." @@ -107,18 +109,14 @@ def main(args): assert len(art_out.samples) == len( art_out.reagent_labels ), "Unequal number of samples and reagent labels." - lane: str = art_out.location[1].split(":")[1] sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out) samples = art_out.samples labels = art_out.reagent_labels - assert len(labels.unique()) == len( - labels - ), "Detected non-unique reagent labels." + assert len(set(labels)) == len(labels), "Detected non-unique reagent labels." # Iterate over samples - for sample in samples: lims_label = sample2label[sample.name] @@ -134,10 +132,76 @@ def main(args): row["Index2"] = index2 row["Lane"] = lane - rows.append(row) + lane_rows.append(row) + + # Add PhiX controls + for phix_idx_pair in [ + ("ACGTGTAGC", "GCTAGTGCA"), + ("CACATGCTG", "AGACACTGT"), + ("GTACACGAT", "CTCGTACAG"), + ("TGTGCATCA", "TAGTCGATC"), + ]: + row = {} + row["SampleName"] = "PhiX" + row["Index1"] = phix_idx_pair[0] + row["Index2"] = phix_idx_pair[1] + row["Lane"] = lane + lane_rows.append(row) + + # Check for index collision within lane, across samples and PhiX + check_index_collision(lane_rows) + all_rows.extend(lane_rows) + + df = pd.DataFrame(all_rows) + + samples_section = f"[Samples]\n{df.to_csv(index=None, header=True)}" + + return samples_section + + +def revcomp(seq: str) -> str: + """Reverse-complement a DNA string.""" + return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1] + + +def check_index_collision(rows: list[dict]) -> None: + """Directionality-agnostic index collision checker.""" + + def idx_combinations(idx1: str, idx2: str | None) -> list[str]: + """Given one or two indices, return all possible reverse-complement combinations.""" + if idx2 is None: + return [idx1, revcomp(idx1)] + else: + return [ + idx1 + idx2, + idx1 + revcomp(idx2), + revcomp(idx1) + idx2, + revcomp(idx1) + revcomp(idx2), + ] + + for i in range(len(rows)): + row = rows[i] + idxs = idx_combinations(row["Index1"], row["Index2"]) + + for row_comp in rows[i + 1 :]: + idxs_comp = idx_combinations(row_comp["Index1"], row_comp["Index2"]) + + if any(idx in idxs_comp for idx in idxs): + raise ValueError( + "Index collision detected between" + + f" {row['SampleName']} ({row['Index1']}-{row['Index2']}) and" + + f" {row_comp['SampleName']} ({row_comp['Index1']}-{row_comp['Index2']})." + ) + + +@epp_decorator(script_path=__file__, timestamp=TIMESTAMP) +def main(args: Namespace): + lims = Lims(BASEURI, USERNAME, PASSWORD) + process = Process(lims, id=args.pid) + + logging.info("Starting to build run manifest.") - df = pd.DataFrame(rows) - samples = f"[Samples]\n{df.to_csv(index=None, header=True)}" + samples_section = get_samples_section(process) if __name__ == "__main__": From 20731398ee85283c21d576af49ed973aaa39c518 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 21 Aug 2024 15:52:24 +0200 Subject: [PATCH 26/50] add todo --- scripts/generate_aviti_run_manifest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 0f8056e2..68e4b9be 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -120,6 +120,8 @@ def get_samples_section(process: Process) -> str: for sample in samples: lims_label = sample2label[sample.name] + # TODO add code here to parse reagent labels that do not only consist of sequences and dashes + if "-" in lims_label: index1, index2 = lims_label.split("-") else: From dda8fda245c56013c6390861da6a76c765826c50 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 21 Aug 2024 16:27:42 +0200 Subject: [PATCH 27/50] add levenshtein module for edit distance --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f3d74068..8cd3ba49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ genologics google_api_python_client httplib2 interop +levenshtein Markdown numpy oauth2client From f02dadda0b03021e8951d1b730bb72a101c38670 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 21 Aug 2024 16:27:55 +0200 Subject: [PATCH 28/50] checkpoint, probably broken --- scripts/generate_aviti_run_manifest.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 68e4b9be..3d9c28b2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -9,6 +9,7 @@ from genologics.config import BASEURI, PASSWORD, USERNAME from genologics.entities import Process from genologics.lims import Lims +from Levenshtein import distance from scilifelab_epps.wrapper import epp_decorator from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping @@ -166,7 +167,7 @@ def revcomp(seq: str) -> str: return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1] -def check_index_collision(rows: list[dict]) -> None: +def check_index_collision(rows: list[dict], warning_dist: int = 3) -> None: """Directionality-agnostic index collision checker.""" def idx_combinations(idx1: str, idx2: str | None) -> list[str]: @@ -188,6 +189,26 @@ def idx_combinations(idx1: str, idx2: str | None) -> list[str]: for row_comp in rows[i + 1 :]: idxs_comp = idx_combinations(row_comp["Index1"], row_comp["Index2"]) + for idx in idxs: + for idx_comp in idxs_comp: + dist = distance(idx, idx_comp) + if dist <= warning_dist: + warning = "\n".join( + [ + f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.", + f" The warning threshold is {warning_dist}.", + "Supplied indexes:", + f" {row['SampleName']}: {row['Index1']}-{row['Index2']}", + f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}", + "Comparison:", + f" {row['SampleName']}: {idx}", + f" {row_comp['SampleName']}: {idx_comp}", + ] + ) + logging.warning(warning) + # TODO + print(warning) + if any(idx in idxs_comp for idx in idxs): raise ValueError( "Index collision detected between" From 94f9cf870b04c722c71c3e22dddd4c9c0a49a71e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Wed, 21 Aug 2024 18:18:52 +0200 Subject: [PATCH 29/50] made two functions to very thoroughly check distance, maybe too thorough. Up for discussion. --- scripts/generate_aviti_run_manifest.py | 134 +++++++++++++++++-------- 1 file changed, 90 insertions(+), 44 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3d9c28b2..63bf6a88 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -9,7 +9,7 @@ from genologics.config import BASEURI, PASSWORD, USERNAME from genologics.entities import Process from genologics.lims import Lims -from Levenshtein import distance +from Levenshtein import hamming as distance from scilifelab_epps.wrapper import epp_decorator from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping @@ -127,7 +127,7 @@ def get_samples_section(process: Process) -> str: index1, index2 = lims_label.split("-") else: index1 = lims_label - index2 = None + index2 = "" row = {} row["SampleName"] = sample.name @@ -152,7 +152,7 @@ def get_samples_section(process: Process) -> str: lane_rows.append(row) # Check for index collision within lane, across samples and PhiX - check_index_collision(lane_rows) + check_distances(lane_rows) all_rows.extend(lane_rows) df = pd.DataFrame(all_rows) @@ -167,54 +167,100 @@ def revcomp(seq: str) -> str: return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1] -def check_index_collision(rows: list[dict], warning_dist: int = 3) -> None: - """Directionality-agnostic index collision checker.""" +def check_pair_distance(row, row_comp, dist_warning_threshold: int = 2): + """Directionality-agnostic distance check between two index pairs.""" - def idx_combinations(idx1: str, idx2: str | None) -> list[str]: - """Given one or two indices, return all possible reverse-complement combinations.""" - if idx2 is None: - return [idx1, revcomp(idx1)] - else: - return [ - idx1 + idx2, - idx1 + revcomp(idx2), - revcomp(idx1) + idx2, - revcomp(idx1) + revcomp(idx2), + def get_index_combos(row): + return set( + [ + row["Index1"] + row["Index2"], + row["Index1"] + revcomp(row["Index2"]), + revcomp(row["Index1"]) + row["Index2"], + revcomp(row["Index1"]) + revcomp(row["Index2"]), ] + ) + + row_combos = get_index_combos(row) + row_comp_combos = get_index_combos(row_comp) + + for row_combo in row_combos: + for row_comp_combo in row_comp_combos: + dist = distance(row_combo, row_comp_combo) + + if dist <= dist_warning_threshold: + warning = "\n".join( + [ + f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.", + f" The warning threshold is {dist_warning_threshold}.", + "Supplied indexes:", + f" {row['SampleName']}: {row['Index1']}-{row['Index2']}", + f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}", + "Comparison:", + f" {row['SampleName']}: {row_combo}", + f" {row_comp['SampleName']}: {row_comp_combo}", + ] + ) + logging.warning(warning) + if dist == 0: + raise AssertionError("Index collision detected.") + + +def check_pair_distance_new(row, row_comp, dist_warning_threshold: int = 2): + """Directionality-agnostic distance check between two index pairs.""" + dists = [] + for a1, _a1 in zip( + [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"] + ): + for a2, _a2 in zip( + [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"] + ): + for b1, _b1 in zip( + [row_comp["Index1"], revcomp(row_comp["Index1"])], + ["Index1", "Index1_rc"], + ): + for b2, _b2 in zip( + [row_comp["Index2"], revcomp(row_comp["Index2"])], + ["Index2", "Index2_rc"], + ): + dists.append( + ( + distance(a1, b1) + distance(a2, b2), + f"{a1}-{a2} {b1}-{b2}", + f"{_a1}-{_a2} {_b1}-{_b2}", + ) + ) + min_dist = min(dists, key=lambda x: x[0]) + + if min_dist[0] <= dist_warning_threshold: + print(f"{row['SampleName']} <--> {row_comp['SampleName']}") + print( + f"Given: {row['Index1']}-{row['Index2']} <--> {row_comp['Index1']}-{row_comp['Index2']}" + ) + print(f"Distance: {min_dist[0]} when flipped to {min_dist[2]}") + print_match(*min_dist[1].split()) + print() + + +def print_match(seq1, seq2): + assert len(seq1) == len(seq2) + + m = "" + for seq1_base, seq2_base in zip(seq1, seq2): + if seq1_base == seq2_base: + m += "|" + else: + m += "X" + lines = "\n".join([seq1, m, seq2]) + print(lines) + + +def check_distances(rows: list[dict]) -> None: for i in range(len(rows)): row = rows[i] - idxs = idx_combinations(row["Index1"], row["Index2"]) for row_comp in rows[i + 1 :]: - idxs_comp = idx_combinations(row_comp["Index1"], row_comp["Index2"]) - - for idx in idxs: - for idx_comp in idxs_comp: - dist = distance(idx, idx_comp) - if dist <= warning_dist: - warning = "\n".join( - [ - f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.", - f" The warning threshold is {warning_dist}.", - "Supplied indexes:", - f" {row['SampleName']}: {row['Index1']}-{row['Index2']}", - f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}", - "Comparison:", - f" {row['SampleName']}: {idx}", - f" {row_comp['SampleName']}: {idx_comp}", - ] - ) - logging.warning(warning) - # TODO - print(warning) - - if any(idx in idxs_comp for idx in idxs): - raise ValueError( - "Index collision detected between" - + f" {row['SampleName']} ({row['Index1']}-{row['Index2']}) and" - + f" {row_comp['SampleName']} ({row_comp['Index1']}-{row_comp['Index2']})." - ) + check_pair_distance_new(row, row_comp) @epp_decorator(script_path=__file__, timestamp=TIMESTAMP) From 1398db9e550f43e5c4120b03f28ef3c8f71aa15e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Thu, 22 Aug 2024 09:41:14 +0200 Subject: [PATCH 30/50] make rc-flips optional in index check --- scripts/generate_aviti_run_manifest.py | 125 +++++++++++-------------- 1 file changed, 56 insertions(+), 69 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 63bf6a88..f494aa26 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -167,81 +167,68 @@ def revcomp(seq: str) -> str: return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1] -def check_pair_distance(row, row_comp, dist_warning_threshold: int = 2): - """Directionality-agnostic distance check between two index pairs.""" - - def get_index_combos(row): - return set( - [ - row["Index1"] + row["Index2"], - row["Index1"] + revcomp(row["Index2"]), - revcomp(row["Index1"]) + row["Index2"], - revcomp(row["Index1"]) + revcomp(row["Index2"]), - ] - ) - - row_combos = get_index_combos(row) - row_comp_combos = get_index_combos(row_comp) - - for row_combo in row_combos: - for row_comp_combo in row_comp_combos: - dist = distance(row_combo, row_comp_combo) - - if dist <= dist_warning_threshold: - warning = "\n".join( - [ - f"Edit distance between {row['SampleName']} and {row_comp['SampleName']} indices is {dist}.", - f" The warning threshold is {dist_warning_threshold}.", - "Supplied indexes:", - f" {row['SampleName']}: {row['Index1']}-{row['Index2']}", - f" {row_comp['SampleName']}: {row_comp['Index1']}-{row_comp['Index2']}", - "Comparison:", - f" {row['SampleName']}: {row_combo}", - f" {row_comp['SampleName']}: {row_comp_combo}", - ] - ) - logging.warning(warning) - if dist == 0: - raise AssertionError("Index collision detected.") - - -def check_pair_distance_new(row, row_comp, dist_warning_threshold: int = 2): - """Directionality-agnostic distance check between two index pairs.""" - dists = [] - for a1, _a1 in zip( - [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"] - ): - for a2, _a2 in zip( - [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"] +def check_pair_distance( + row, row_comp, check_flips: bool = False, dist_warning_threshold: int = 3 +): + """Distance check between two index pairs. + + row dict manifest row of sample A + row_comp dict manifest row of sample B + check_flips bool check all reverse-complement combinations + dist_warning_threshold int trigger warning for distances at or below this value + + """ + + if check_flips: + flips = [] + for a1, _a1 in zip( + [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"] ): - for b1, _b1 in zip( - [row_comp["Index1"], revcomp(row_comp["Index1"])], - ["Index1", "Index1_rc"], + for a2, _a2 in zip( + [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"] ): - for b2, _b2 in zip( - [row_comp["Index2"], revcomp(row_comp["Index2"])], - ["Index2", "Index2_rc"], + for b1, _b1 in zip( + [row_comp["Index1"], revcomp(row_comp["Index1"])], + ["Index1", "Index1_rc"], ): - dists.append( - ( - distance(a1, b1) + distance(a2, b2), - f"{a1}-{a2} {b1}-{b2}", - f"{_a1}-{_a2} {_b1}-{_b2}", + for b2, _b2 in zip( + [row_comp["Index2"], revcomp(row_comp["Index2"])], + ["Index2", "Index2_rc"], + ): + flips.append( + ( + distance(a1, b1) + distance(a2, b2), + f"{a1}-{a2} {b1}-{b2}", + f"{_a1}-{_a2} {_b1}-{_b2}", + ) ) - ) - min_dist = min(dists, key=lambda x: x[0]) + dist, compared_seqs, flip_conf = min(flips, key=lambda x: x[0]) - if min_dist[0] <= dist_warning_threshold: - print(f"{row['SampleName']} <--> {row_comp['SampleName']}") - print( - f"Given: {row['Index1']}-{row['Index2']} <--> {row_comp['Index1']}-{row_comp['Index2']}" + else: + dist = distance( + row["Index1"] + row["Index2"], row_comp["Index1"] + row_comp["Index2"] + ) + compared_seqs = ( + f"{row['Index1']}-{row['Index2']} {row_comp['Index1']}-{row_comp['Index2']}" ) - print(f"Distance: {min_dist[0]} when flipped to {min_dist[2]}") - print_match(*min_dist[1].split()) - print() + if dist <= dist_warning_threshold: + warning_lines = [ + f"Hamming distance {dist} between {row['SampleName']} and {row_comp['SampleName']}" + ] + if check_flips: + warning_lines.append( + f"Given: {row['Index1']}-{row['Index2']} <-> {row_comp['Index1']}-{row_comp['Index2']}" + ) + warning_lines.append(f"Distance: {dist} when flipped to {flip_conf}") + warning_lines.append(visualize_hamming(*compared_seqs.split())) + warning = "\n".join(warning_lines) + logging.warning(warning) + + +def visualize_hamming(seq1: str, seq2: str) -> str: + """Visualize Hamming alignment""" -def print_match(seq1, seq2): assert len(seq1) == len(seq2) m = "" @@ -252,7 +239,7 @@ def print_match(seq1, seq2): m += "X" lines = "\n".join([seq1, m, seq2]) - print(lines) + return lines def check_distances(rows: list[dict]) -> None: @@ -260,7 +247,7 @@ def check_distances(rows: list[dict]) -> None: row = rows[i] for row_comp in rows[i + 1 :]: - check_pair_distance_new(row, row_comp) + check_pair_distance(row, row_comp, dist_warning_threshold=4) @epp_decorator(script_path=__file__, timestamp=TIMESTAMP) From 628b9314cf0654ab8261c5d56416727fc745509d Mon Sep 17 00:00:00 2001 From: kedhammar Date: Thu, 22 Aug 2024 09:41:43 +0200 Subject: [PATCH 31/50] remove dataclasses --- scripts/generate_aviti_run_manifest.py | 70 -------------------------- 1 file changed, 70 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index f494aa26..68919492 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -2,7 +2,6 @@ import logging from argparse import ArgumentParser, Namespace -from dataclasses import dataclass, field from datetime import datetime as dt import pandas as pd @@ -17,75 +16,6 @@ TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") -@dataclass -class Row: - def __init__(self, **kwargs): - for k, v in kwargs.items(): - setattr(self, k, v) - - def write(self, f): - for attr in self.__dict__.values(): - if isinstance(attr, str) and "," in attr: - f.write(f'"{attr}", ') - else: - f.write(f"{attr}, ") - f.write("\n") - - -@dataclass -class Section: - rows: list[Row] = field(default_factory=list) - - def add(self, row: Row): - self.rows.append(row) - - def write(self, f): - f.write(self.mark_start + "\n") - f.write(", ".join(self.cols) + "\n") - for row in self.rows: - row.write(f) - f.write("\n") - - -@dataclass -class RunValues(Section): - mark_start: str = "[Run Values]" - cols: list[str] = field(default_factory=lambda: ["KeyName", "Value"]) - - -@dataclass -class Settings(Section): - mark_start: str = "[Settings]" - cols: list[str] = field(default_factory=lambda: ["SettingName", "Value"]) - - -@dataclass -class Samples(Section): - mark_start: str = "[Samples]" - cols: list[str] = field( - default_factory=lambda: [ - "SampleName", - "Index1", - "Index2", - "Lane", - "Project", - "ExternalID", - ] - ) - - -@dataclass -class Manifest: - runvalues: RunValues = RunValues() - settings: Settings = Settings() - samples: Samples = Samples() - - def write(self, file_path: str): - with open(file_path, "w") as f: - for section in [self.runvalues, self.settings, self.samples]: - section.write(f) - - def get_samples_section(process: Process) -> str: """Generate the [Samples] section of the AVITI run manifest and return it as a string.""" From 6b8f5ce57083f01977e8b6493f7901998bea991e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Thu, 22 Aug 2024 09:43:02 +0200 Subject: [PATCH 32/50] add hard stop for index collision --- scripts/generate_aviti_run_manifest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 68919492..8f88cc2c 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -154,6 +154,8 @@ def check_pair_distance( warning_lines.append(visualize_hamming(*compared_seqs.split())) warning = "\n".join(warning_lines) logging.warning(warning) + if dist == 0: + raise AssertionError("Identical indices detected.") def visualize_hamming(seq1: str, seq2: str) -> str: From e24ababcaf927219120a925a7df6e0a51f747ca4 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Thu, 22 Aug 2024 10:01:10 +0200 Subject: [PATCH 33/50] try fixing reqs --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8cd3ba49..70438d85 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,13 +4,13 @@ genologics google_api_python_client httplib2 interop -levenshtein Markdown numpy oauth2client pandas protobuf psycopg2 +python_levenshtein PyYAML Requests scilifelab_parsers @ git+https://github.com/SciLifeLab/scilifelab_parsers From ad183bcc4bab59cd417f38818fd1ea0939379116 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Thu, 22 Aug 2024 10:14:02 +0200 Subject: [PATCH 34/50] improve docs --- scripts/generate_aviti_run_manifest.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 8f88cc2c..69cb5d30 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -143,17 +143,26 @@ def check_pair_distance( ) if dist <= dist_warning_threshold: + # Build a warning message for the pair warning_lines = [ f"Hamming distance {dist} between {row['SampleName']} and {row_comp['SampleName']}" ] + # If the distance is derived from a flip, show the original and the flipped conformation if check_flips: warning_lines.append( f"Given: {row['Index1']}-{row['Index2']} <-> {row_comp['Index1']}-{row_comp['Index2']}" ) warning_lines.append(f"Distance: {dist} when flipped to {flip_conf}") - warning_lines.append(visualize_hamming(*compared_seqs.split())) + # If the index lengths are equal, add a simple small visual representation + if len(row["Index1"]) + len(row["Index2"]) == len(row_comp["Index1"]) + len( + row_comp["Index2"] + ): + warning_lines.append(visualize_hamming(*compared_seqs.split())) + warning = "\n".join(warning_lines) logging.warning(warning) + + # For identical collisions, kill the process if dist == 0: raise AssertionError("Identical indices detected.") From 6798734ab3449b4b5dd67b57a7bd22ff5a613680 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Thu, 22 Aug 2024 16:36:27 +0200 Subject: [PATCH 35/50] polishing --- scripts/generate_aviti_run_manifest.py | 54 ++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 69cb5d30..6003a123 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -17,7 +17,7 @@ def get_samples_section(process: Process) -> str: - """Generate the [Samples] section of the AVITI run manifest and return it as a string.""" + """Generate the [SAMPLES] section of the AVITI run manifest and return it as a string.""" # Get the analytes placed into the flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] @@ -87,7 +87,7 @@ def get_samples_section(process: Process) -> str: df = pd.DataFrame(all_rows) - samples_section = f"[Samples]\n{df.to_csv(index=None, header=True)}" + samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" return samples_section @@ -153,11 +153,11 @@ def check_pair_distance( f"Given: {row['Index1']}-{row['Index2']} <-> {row_comp['Index1']}-{row_comp['Index2']}" ) warning_lines.append(f"Distance: {dist} when flipped to {flip_conf}") - # If the index lengths are equal, add a simple small visual representation + # If the index lengths are equal, add a simple visual representation if len(row["Index1"]) + len(row["Index2"]) == len(row_comp["Index1"]) + len( row_comp["Index2"] ): - warning_lines.append(visualize_hamming(*compared_seqs.split())) + warning_lines.append(show_match(*compared_seqs.split())) warning = "\n".join(warning_lines) logging.warning(warning) @@ -167,8 +167,8 @@ def check_pair_distance( raise AssertionError("Identical indices detected.") -def visualize_hamming(seq1: str, seq2: str) -> str: - """Visualize Hamming alignment""" +def show_match(seq1: str, seq2: str) -> str: + """Visualize base-by-base match between sequences of equal length.""" assert len(seq1) == len(seq2) @@ -183,12 +183,43 @@ def visualize_hamming(seq1: str, seq2: str) -> str: return lines -def check_distances(rows: list[dict]) -> None: +def check_distances(rows: list[dict], dist_warning_threshold=3) -> None: for i in range(len(rows)): row = rows[i] for row_comp in rows[i + 1 :]: - check_pair_distance(row, row_comp, dist_warning_threshold=4) + check_pair_distance( + row, row_comp, dist_warning_threshold=dist_warning_threshold + ) + + +def get_runValues_section(process: Process) -> str: + """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" + + runValues_section = "\n".join( + [ + "[RUNVALUES]", + "KeyName, Value", + f"lims_step_name, {process.type.name}", + f"lims_step_id, {process.id}", + f"lims_step_operator, {process.technician.name}", + f"file_timestamp, {TIMESTAMP}", + ] + ) + + return runValues_section + + +def get_settings_section(process) -> str: + """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string.""" + settings_section = "\n".join( + [ + "[SETTINGS]", + "SettingName, Value", + ] + ) + + return settings_section @epp_decorator(script_path=__file__, timestamp=TIMESTAMP) @@ -198,8 +229,15 @@ def main(args: Namespace): logging.info("Starting to build run manifest.") + runValues_section = get_runValues_section(process) + settings_section = get_settings_section(process) samples_section = get_samples_section(process) + # TODO string sanitation + manifest = "\n\n".join([runValues_section, settings_section, samples_section]) + + # TODO upload manifest to file slot + if __name__ == "__main__": # Parse args From 528db93fb1fdb6e9f421d08e5b516792c9d20578 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:03:30 +0200 Subject: [PATCH 36/50] ready for testing --- scripts/generate_aviti_run_manifest.py | 85 ++++++++++++++++++++------ 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 6003a123..bb52bd81 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -1,6 +1,9 @@ #!/usr/bin/env python import logging +import os +import re +import shutil from argparse import ArgumentParser, Namespace from datetime import datetime as dt @@ -10,10 +13,12 @@ from genologics.lims import Lims from Levenshtein import hamming as distance +from scilifelab_epps.epp import upload_file from scilifelab_epps.wrapper import epp_decorator from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") +LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?") def get_samples_section(process: Process) -> str: @@ -22,14 +27,16 @@ def get_samples_section(process: Process) -> str: # Get the analytes placed into the flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] - # Assert that both flowcell lanes are filled - assert set([art_out.location[1].split(":")[1] for art_out in arts_out]) == set( - ["1", "2"] - ), "Expected two populated lanes." + # Check whether lanes are individually addressable + lanes_used = set([art_out.location[1].split(":")[1] for art_out in arts_out]) + ungrouped_lanes = True if len(lanes_used) == 2 else False + logging.info(f"Individually addressable lanes: {ungrouped_lanes}") # Iterate over pools all_rows = [] for art_out in arts_out: + logging.info(f"Iterating over pool '{art_out.id}'...") + lane_rows = [] assert ( art_out.container.type.name == "AVITI Flow Cell" @@ -40,6 +47,7 @@ def get_samples_section(process: Process) -> str: assert len(art_out.samples) == len( art_out.reagent_labels ), "Unequal number of samples and reagent labels." + lane: str = art_out.location[1].split(":")[1] sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out) samples = art_out.samples @@ -51,19 +59,25 @@ def get_samples_section(process: Process) -> str: for sample in samples: lims_label = sample2label[sample.name] - # TODO add code here to parse reagent labels that do not only consist of sequences and dashes + # Parse sample index + label_seq_match = re.search(LABEL_SEQ_SUBSTRING, lims_label) + assert ( + label_seq_match is not None + ), f"Could not parse label sequence from {lims_label}" + label_seq = label_seq_match.group(0) - if "-" in lims_label: - index1, index2 = lims_label.split("-") + if "-" in label_seq: + index1, index2 = label_seq.split("-") else: - index1 = lims_label + index1 = label_seq index2 = "" row = {} row["SampleName"] = sample.name row["Index1"] = index1 row["Index2"] = index2 - row["Lane"] = lane + if ungrouped_lanes: + row["Lane"] = lane lane_rows.append(row) @@ -78,7 +92,8 @@ def get_samples_section(process: Process) -> str: row["SampleName"] = "PhiX" row["Index1"] = phix_idx_pair[0] row["Index2"] = phix_idx_pair[1] - row["Lane"] = lane + if ungrouped_lanes: + row["Lane"] = lane lane_rows.append(row) # Check for index collision within lane, across samples and PhiX @@ -193,16 +208,25 @@ def check_distances(rows: list[dict], dist_warning_threshold=3) -> None: ) -def get_runValues_section(process: Process) -> str: +def safe_string(s: str) -> str: + """Wrap a string in quotes if it contains commas.""" + if "," in s: + return f'"{s}"' + else: + return s + + +def get_runValues_section(process: Process, file_name: str) -> str: """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" runValues_section = "\n".join( [ "[RUNVALUES]", "KeyName, Value", - f"lims_step_name, {process.type.name}", + f"lims_step_name, {safe_string(process.type.name)}", f"lims_step_id, {process.id}", f"lims_step_operator, {process.technician.name}", + f"file_name, {safe_string(file_name)}", f"file_timestamp, {TIMESTAMP}", ] ) @@ -210,7 +234,7 @@ def get_runValues_section(process: Process) -> str: return runValues_section -def get_settings_section(process) -> str: +def get_settings_section() -> str: """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string.""" settings_section = "\n".join( [ @@ -227,16 +251,43 @@ def main(args: Namespace): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) + file_name = ( + f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name}.csv" + ) + + # Build manifest logging.info("Starting to build run manifest.") - runValues_section = get_runValues_section(process) - settings_section = get_settings_section(process) + runValues_section = get_runValues_section(process, file_name) + settings_section = get_settings_section() samples_section = get_samples_section(process) - # TODO string sanitation manifest = "\n\n".join([runValues_section, settings_section, samples_section]) - # TODO upload manifest to file slot + # Write manifest + with open(file_name, "w") as f: + f.write(manifest, encoding="utf-8") + + # Upload manifest + logging.info("Uploading run manifest to LIMS...") + upload_file( + file_name, + args.file, + process, + lims, + ) + + logging.info("Moving samplesheet to ngi-nas-ns...") + try: + shutil.copyfile( + file_name, + f"/srv/ngi-nas-ns/samplesheets/AVITI/{dt.now().year}/{file_name}", + ) + os.remove(file_name) + except: + logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True) + else: + logging.info("Samplesheet moved to ngi-nas-ns.") if __name__ == "__main__": From 20036f898c999bce22b456282d2ba36738167dda Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:05:06 +0200 Subject: [PATCH 37/50] fix name --- scripts/generate_aviti_run_manifest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index bb52bd81..99c54278 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -277,7 +277,7 @@ def main(args: Namespace): lims, ) - logging.info("Moving samplesheet to ngi-nas-ns...") + logging.info("Moving run manifest to ngi-nas-ns...") try: shutil.copyfile( file_name, @@ -285,9 +285,9 @@ def main(args: Namespace): ) os.remove(file_name) except: - logging.error("Failed to move samplesheet to ngi-nas-ns.", exc_info=True) + logging.error("Failed to move run manifest to ngi-nas-ns.", exc_info=True) else: - logging.info("Samplesheet moved to ngi-nas-ns.") + logging.info("Run manifest moved to ngi-nas-ns.") if __name__ == "__main__": From eafe4db0408dfe505e757f79c45b20ec61867e7e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:08:17 +0200 Subject: [PATCH 38/50] move blocks --- scripts/generate_aviti_run_manifest.py | 89 +++++++++++++------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 99c54278..541f2b69 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -21,6 +21,38 @@ LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?") +def get_runValues_section(process: Process, file_name: str) -> str: + """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" + + # TODO master step fields for read recipe? + + runValues_section = "\n".join( + [ + "[RUNVALUES]", + "KeyName, Value", + f"lims_step_name, {safe_string(process.type.name)}", + f"lims_step_id, {process.id}", + f"lims_step_operator, {process.technician.name}", + f"file_name, {safe_string(file_name)}", + f"file_timestamp, {TIMESTAMP}", + ] + ) + + return runValues_section + + +def get_settings_section() -> str: + """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string.""" + settings_section = "\n".join( + [ + "[SETTINGS]", + "SettingName, Value", + ] + ) + + return settings_section + + def get_samples_section(process: Process) -> str: """Generate the [SAMPLES] section of the AVITI run manifest and return it as a string.""" @@ -82,6 +114,7 @@ def get_samples_section(process: Process) -> str: lane_rows.append(row) # Add PhiX controls + # TODO read from master step field for phix_idx_pair in [ ("ACGTGTAGC", "GCTAGTGCA"), ("CACATGCTG", "AGACACTGT"), @@ -107,9 +140,14 @@ def get_samples_section(process: Process) -> str: return samples_section -def revcomp(seq: str) -> str: - """Reverse-complement a DNA string.""" - return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1] +def check_distances(rows: list[dict], dist_warning_threshold=3) -> None: + for i in range(len(rows)): + row = rows[i] + + for row_comp in rows[i + 1 :]: + check_pair_distance( + row, row_comp, dist_warning_threshold=dist_warning_threshold + ) def check_pair_distance( @@ -182,6 +220,11 @@ def check_pair_distance( raise AssertionError("Identical indices detected.") +def revcomp(seq: str) -> str: + """Reverse-complement a DNA string.""" + return seq.translate(str.maketrans("ACGT", "TGCA"))[::-1] + + def show_match(seq1: str, seq2: str) -> str: """Visualize base-by-base match between sequences of equal length.""" @@ -198,16 +241,6 @@ def show_match(seq1: str, seq2: str) -> str: return lines -def check_distances(rows: list[dict], dist_warning_threshold=3) -> None: - for i in range(len(rows)): - row = rows[i] - - for row_comp in rows[i + 1 :]: - check_pair_distance( - row, row_comp, dist_warning_threshold=dist_warning_threshold - ) - - def safe_string(s: str) -> str: """Wrap a string in quotes if it contains commas.""" if "," in s: @@ -216,36 +249,6 @@ def safe_string(s: str) -> str: return s -def get_runValues_section(process: Process, file_name: str) -> str: - """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" - - runValues_section = "\n".join( - [ - "[RUNVALUES]", - "KeyName, Value", - f"lims_step_name, {safe_string(process.type.name)}", - f"lims_step_id, {process.id}", - f"lims_step_operator, {process.technician.name}", - f"file_name, {safe_string(file_name)}", - f"file_timestamp, {TIMESTAMP}", - ] - ) - - return runValues_section - - -def get_settings_section() -> str: - """Generate the [SETTINGS] section of the AVITI run manifest and return it as a string.""" - settings_section = "\n".join( - [ - "[SETTINGS]", - "SettingName, Value", - ] - ) - - return settings_section - - @epp_decorator(script_path=__file__, timestamp=TIMESTAMP) def main(args: Namespace): lims = Lims(BASEURI, USERNAME, PASSWORD) From 6742781929f7711c50a4ae844849ad5a17f37c15 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:11:22 +0200 Subject: [PATCH 39/50] remove kw arg --- scripts/generate_aviti_run_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 541f2b69..3d0cd088 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -269,7 +269,7 @@ def main(args: Namespace): # Write manifest with open(file_name, "w") as f: - f.write(manifest, encoding="utf-8") + f.write(manifest) # Upload manifest logging.info("Uploading run manifest to LIMS...") From c6d47f3f8af58fe79ee0d00f2014e024173c7e75 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:16:29 +0200 Subject: [PATCH 40/50] get rid of space --- scripts/generate_aviti_run_manifest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3d0cd088..1d8adc23 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -254,9 +254,7 @@ def main(args: Namespace): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) - file_name = ( - f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name}.csv" - ) + file_name = f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv" # Build manifest logging.info("Starting to build run manifest.") From f044e7fa40afaa03ec3252c3e3555a99e1aba369 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:18:02 +0200 Subject: [PATCH 41/50] remove superfluous log --- scripts/generate_aviti_run_manifest.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 1d8adc23..2ca70f7f 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -67,8 +67,6 @@ def get_samples_section(process: Process) -> str: # Iterate over pools all_rows = [] for art_out in arts_out: - logging.info(f"Iterating over pool '{art_out.id}'...") - lane_rows = [] assert ( art_out.container.type.name == "AVITI Flow Cell" From ee1ad8c2dffc266755cb2505eb70cd94ea58eb56 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 10:21:36 +0200 Subject: [PATCH 42/50] make assertion more lenient --- scripts/generate_aviti_run_manifest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 2ca70f7f..ca287787 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -69,8 +69,8 @@ def get_samples_section(process: Process) -> str: for art_out in arts_out: lane_rows = [] assert ( - art_out.container.type.name == "AVITI Flow Cell" - ), "Unsupported container type." + "AVITI Flow Cell" in art_out.container.type.name + ), f"Unsupported container type {art_out.container.type.name}." assert ( len(art_out.samples) > 1 and len(art_out.reagent_labels) > 1 ), "Not a pool." From 3a53112faed1711cac354a40656e40d2b71c59f2 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 11:00:55 +0200 Subject: [PATCH 43/50] make adding phix to manifest conditional on udf, dump run recipe in samplesheet --- scripts/generate_aviti_run_manifest.py | 40 ++++++++++++++++---------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index ca287787..1b18e51c 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -24,7 +24,14 @@ def get_runValues_section(process: Process, file_name: str) -> str: """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" - # TODO master step fields for read recipe? + read_recipe = "-".join( + [ + str(process.udf.get("Read 1 Cycles", 0)), + str(process.udf.get("Index Read 1", 0)), + str(process.udf.get("Index Read 2", 0)), + str(process.udf.get("Read 2 Cycles", 0)), + ] + ) runValues_section = "\n".join( [ @@ -35,6 +42,7 @@ def get_runValues_section(process: Process, file_name: str) -> str: f"lims_step_operator, {process.technician.name}", f"file_name, {safe_string(file_name)}", f"file_timestamp, {TIMESTAMP}", + f"read_recipe, {read_recipe}", ] ) @@ -56,6 +64,8 @@ def get_settings_section() -> str: def get_samples_section(process: Process) -> str: """Generate the [SAMPLES] section of the AVITI run manifest and return it as a string.""" + phix_loaded: bool = process.udf["PhiX Loaded"] + # Get the analytes placed into the flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] @@ -112,20 +122,20 @@ def get_samples_section(process: Process) -> str: lane_rows.append(row) # Add PhiX controls - # TODO read from master step field - for phix_idx_pair in [ - ("ACGTGTAGC", "GCTAGTGCA"), - ("CACATGCTG", "AGACACTGT"), - ("GTACACGAT", "CTCGTACAG"), - ("TGTGCATCA", "TAGTCGATC"), - ]: - row = {} - row["SampleName"] = "PhiX" - row["Index1"] = phix_idx_pair[0] - row["Index2"] = phix_idx_pair[1] - if ungrouped_lanes: - row["Lane"] = lane - lane_rows.append(row) + if phix_loaded: + for phix_idx_pair in [ + ("ACGTGTAGC", "GCTAGTGCA"), + ("CACATGCTG", "AGACACTGT"), + ("GTACACGAT", "CTCGTACAG"), + ("TGTGCATCA", "TAGTCGATC"), + ]: + row = {} + row["SampleName"] = "PhiX" + row["Index1"] = phix_idx_pair[0] + row["Index2"] = phix_idx_pair[1] + if ungrouped_lanes: + row["Lane"] = lane + lane_rows.append(row) # Check for index collision within lane, across samples and PhiX check_distances(lane_rows) From 8f82db1175451759fb3352d36bb8eec1ae7b05d1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 23 Aug 2024 11:04:40 +0200 Subject: [PATCH 44/50] correct path --- scripts/generate_aviti_run_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 1b18e51c..b7c7e455 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -290,7 +290,7 @@ def main(args: Namespace): try: shutil.copyfile( file_name, - f"/srv/ngi-nas-ns/samplesheets/AVITI/{dt.now().year}/{file_name}", + f"/srv/ngi-nas-ns/samplesheets/Aviti/{dt.now().year}/{file_name}", ) os.remove(file_name) except: From 52664a9adbf38caea62a9221d5f47176d0807436 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 11:04:17 +0200 Subject: [PATCH 45/50] always explicate lanes in [SAMPLES] section --- scripts/generate_aviti_run_manifest.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index b7c7e455..3d6dd2e2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -68,15 +68,16 @@ def get_samples_section(process: Process) -> str: # Get the analytes placed into the flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] + lanes = [art_out.location[1].split(":")[1] for art_out in arts_out] - # Check whether lanes are individually addressable - lanes_used = set([art_out.location[1].split(":")[1] for art_out in arts_out]) - ungrouped_lanes = True if len(lanes_used) == 2 else False - logging.info(f"Individually addressable lanes: {ungrouped_lanes}") + # If only a single pool is added to the LIMS container, treat it as though it was loaded into both lanes + if len(lanes) == 1: + lanes.append("2" if lanes[0] == "1" else "1") + arts_out.append(arts_out[0]) # Iterate over pools all_rows = [] - for art_out in arts_out: + for art_out, lane in zip(arts_out, lanes): lane_rows = [] assert ( "AVITI Flow Cell" in art_out.container.type.name @@ -88,7 +89,6 @@ def get_samples_section(process: Process) -> str: art_out.reagent_labels ), "Unequal number of samples and reagent labels." - lane: str = art_out.location[1].split(":")[1] sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out) samples = art_out.samples labels = art_out.reagent_labels @@ -116,8 +116,7 @@ def get_samples_section(process: Process) -> str: row["SampleName"] = sample.name row["Index1"] = index1 row["Index2"] = index2 - if ungrouped_lanes: - row["Lane"] = lane + row["Lane"] = lane lane_rows.append(row) @@ -133,8 +132,7 @@ def get_samples_section(process: Process) -> str: row["SampleName"] = "PhiX" row["Index1"] = phix_idx_pair[0] row["Index2"] = phix_idx_pair[1] - if ungrouped_lanes: - row["Lane"] = lane + row["Lane"] = lane lane_rows.append(row) # Check for index collision within lane, across samples and PhiX From 5b0ca7b022e265f5aadf23f0c1321986658fd707 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 11:09:23 +0200 Subject: [PATCH 46/50] sort samples section by name and lane --- scripts/generate_aviti_run_manifest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3d6dd2e2..16991ae5 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -140,6 +140,7 @@ def get_samples_section(process: Process) -> str: all_rows.extend(lane_rows) df = pd.DataFrame(all_rows) + df.sort_values(by=["SampleName", "Lane"], inplace=True) samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" From 5ac1299c8d167ad4498be14fbc617477f9b48bc0 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 11:12:36 +0200 Subject: [PATCH 47/50] Revert "sort samples section by name and lane" This reverts commit 5b0ca7b022e265f5aadf23f0c1321986658fd707. --- scripts/generate_aviti_run_manifest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 16991ae5..3d6dd2e2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -140,7 +140,6 @@ def get_samples_section(process: Process) -> str: all_rows.extend(lane_rows) df = pd.DataFrame(all_rows) - df.sort_values(by=["SampleName", "Lane"], inplace=True) samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" From 0ecb673aded3c708693a604121cbef902d9498bc Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 13:35:43 +0200 Subject: [PATCH 48/50] adapt to 2-lane container --- scripts/generate_aviti_run_manifest.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3d6dd2e2..827f6f5d 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -66,14 +66,11 @@ def get_samples_section(process: Process) -> str: phix_loaded: bool = process.udf["PhiX Loaded"] - # Get the analytes placed into the flowcell + # Assert two output analytes placed in either flowcell lane arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] + assert len(arts_out) == 2, "Expected two output analytes." lanes = [art_out.location[1].split(":")[1] for art_out in arts_out] - - # If only a single pool is added to the LIMS container, treat it as though it was loaded into both lanes - if len(lanes) == 1: - lanes.append("2" if lanes[0] == "1" else "1") - arts_out.append(arts_out[0]) + assert set(lanes) == {"1", "2"}, "Expected lanes 1 and 2." # Iterate over pools all_rows = [] From 0cbddfc60c54a6f85b6e5b2e6483a12c1394f275 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 13:45:37 +0200 Subject: [PATCH 49/50] Include flowcell ID, rename sanitation func and trim redundant metadata --- scripts/generate_aviti_run_manifest.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 827f6f5d..eee58152 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -21,6 +21,16 @@ LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?") +def get_flowcell_id(process: Process) -> str: + flowcell_ids = [ + op.container.name for op in process.all_outputs() if op.type == "Analyte" + ] + + assert len(set(flowcell_ids)) == 1, "Expected one flowcell ID." + + return flowcell_ids[0] + + def get_runValues_section(process: Process, file_name: str) -> str: """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" @@ -37,11 +47,8 @@ def get_runValues_section(process: Process, file_name: str) -> str: [ "[RUNVALUES]", "KeyName, Value", - f"lims_step_name, {safe_string(process.type.name)}", - f"lims_step_id, {process.id}", - f"lims_step_operator, {process.technician.name}", - f"file_name, {safe_string(file_name)}", - f"file_timestamp, {TIMESTAMP}", + f"lims_step_name, {sanitize(process.type.name)}", + f"file_name, {sanitize(file_name)}", f"read_recipe, {read_recipe}", ] ) @@ -244,7 +251,7 @@ def show_match(seq1: str, seq2: str) -> str: return lines -def safe_string(s: str) -> str: +def sanitize(s: str) -> str: """Wrap a string in quotes if it contains commas.""" if "," in s: return f'"{s}"' @@ -257,7 +264,9 @@ def main(args: Namespace): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) - file_name = f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv" + # Name manifest file + flowcell_id = get_flowcell_id(process) + file_name = f"AVITI_run_manifest_{flowcell_id}_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv" # Build manifest logging.info("Starting to build run manifest.") From 20f6dbd1e02a5b823f0849e851d16c4904699f2a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 13:53:33 +0200 Subject: [PATCH 50/50] add warning --- scripts/generate_aviti_run_manifest.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index eee58152..2cae17f3 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -27,8 +27,14 @@ def get_flowcell_id(process: Process) -> str: ] assert len(set(flowcell_ids)) == 1, "Expected one flowcell ID." + flowcell_id = flowcell_ids[0] - return flowcell_ids[0] + if "-" in flowcell_id: + logging.warning( + f"Container name {flowcell_id} contains a dash, did you forget to set the name of the LIMS container to the flowcell ID?" + ) + + return flowcell_id def get_runValues_section(process: Process, file_name: str) -> str: