From 494ff3533d3bf25d8412ee4028fbf931a2841d4f Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 10:45:25 +0200 Subject: [PATCH 1/4] Support AVITI in the BCL conversion step --- VERSIONLOG.md | 4 + scripts/aviti_run_parameter_parser.py | 2 +- scripts/manage_demux_stats.py | 108 ++++++++++++++++++++++- scripts/manage_demux_stats_thresholds.py | 15 ++++ 4 files changed, 127 insertions(+), 2 deletions(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 14618fa8..894cfc5b 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240823.1 + +Support AVITI in the BCL conversion step + ## 20240822.1 New EPP scripts for parsing AVITI run parameters and stats diff --git a/scripts/aviti_run_parameter_parser.py b/scripts/aviti_run_parameter_parser.py index 1f4a06fd..d914c328 100644 --- a/scripts/aviti_run_parameter_parser.py +++ b/scripts/aviti_run_parameter_parser.py @@ -83,7 +83,7 @@ def set_step_udfs(process, run_dir): process.udf["Index Read 2"] = run_parameters["Cycles"].get("I2", 0) process.udf["Read Order"] = run_parameters.get("ReadOrder") - process.udf["Throughut Selection"] = run_parameters.get("ThroughputSelection") + process.udf["Throughput Selection"] = run_parameters.get("ThroughputSelection") process.udf["Kit Configuration"] = run_parameters.get("KitConfiguration") process.udf["Preparation Workflow"] = run_parameters.get("PreparationWorkflow") process.udf["Chemistry Version"] = run_parameters.get("ChemistryVersion") diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index 2ea549d8..883fc3f8 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -53,6 +53,7 @@ def get_process_stats(demux_process): "AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)", "Illumina Sequencing (NextSeq) v1.0", "NovaSeqXPlus Run v1.0", + "AVITI Run v1.0" } try: # Query LIMS for all steps containing the first input artifact of this step and match to the set of sequencing steps @@ -121,6 +122,21 @@ def get_process_stats(demux_process): ) proc_stats["Paired"] = True if seq_process.udf.get("Read 2 Cycles") else False + elif "AVITI Run" in seq_process.type.name: + try: + proc_stats["Chemistry"] = "AVITI" + seq_process.udf["Throughput Selection"] + except Exception as e: + problem_handler( + "exit", f"No flowcell version set in sequencing step: {str(e)}" + ) + proc_stats["Instrument"] = "AVITI" + proc_stats["Read Length"] = ( + max(seq_process.udf["Read 1 Cycles"], seq_process.udf["Read 2 Cycles"]) + if seq_process.udf.get("Read 2 Cycles") + else seq_process.udf["Read 1 Cycles"] + ) + proc_stats["Paired"] = True if seq_process.udf.get("Read 2 Cycles") else False + else: problem_handler("exit", "Unhandled workflow step (run type)") logger.info("Run type/chemistry set to {}".format(proc_stats["Chemistry"])) @@ -248,6 +264,7 @@ def set_sample_values(demux_process, parser_struct, process_stats): "AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)", "Illumina Sequencing (NextSeq) v1.0", "NovaSeqXPlus Run v1.0", + "AVITI Run v1.0" } seq_process = lims.get_processes( inputartifactlimsid=demux_process.all_inputs()[0].id, type=seq_processes @@ -443,6 +460,7 @@ def set_sample_values(demux_process, parser_struct, process_stats): "AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)", "Illumina Sequencing (NextSeq) v1.0", "NovaSeqXPlus Run v1.0", + "AVITI Run v1.0" ]: try: for inp in seq_process.all_outputs(): @@ -784,6 +802,91 @@ def write_demuxfile(process_stats, demux_id): return laneBC.sample_data +def write_demuxfile_aviti(process_stats, demux_id): + """Creates demux_{FCID}.csv and attaches it to process""" + # Includes windows drive letter support + + metadata_dir_name = "ngi-nas-ns" + instrument_dir_name = "{}_data".format(process_stats["Instrument"]) + + lanebc_path = os.path.join( + os.sep, + "srv", + metadata_dir_name, + instrument_dir_name, + process_stats["Run ID"], + "IndexAssignment.csv", + ) + + try: + laneBC = {} + laneBC['sample_data'] = [] + with open(lanebc_path, 'r') as lanebc_file: + reader = csv.DictReader(lanebc_file) + for row in reader: + if '+' not in row['Lane']: + index = row.get('I1', '') + if row.get('I2'): + index += '-' + index += row['I2'] + laneBC['sample_data'].append( + { + 'Lane' : row.get('Lane', ''), + 'Sample' : row.get('SampleName', ''), + 'Project' : row.get('Project', ''), + 'Barcode sequence' : index, + 'PF Clusters' : row.get('NumPoloniesAssigned', '0'), + '% of thelane' : row.get('PercentPoloniesAssigned', '0'), + 'Yield (Mbases)' : str(float(row.get('Yield(Gb)', '0'))*1000) + } + ) + except Exception as e: + problem_handler( + "exit", + f"Unable to fetch IndexAssignment.csv from {lanebc_path}: {str(e)}", + ) + + fname = "{}_demuxstats_{}.csv".format(demux_id, process_stats["Flow Cell ID"]) + + # Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams + with open(fname, "w") as csvfile: + writer = csv.writer(csvfile) + writer.writerow( + [ + "Project", + "Sample ID", + "Lane", + "# Reads", + "Index" + ] + ) + for entry in laneBC['sample_data']: + + reads = entry["PF Clusters"] + + if process_stats["Paired"]: + reads = int(reads.replace(",", "")) * 2 + else: + reads = int(reads.replace(",", "")) + + try: + writer.writerow( + [ + entry["Project"], + entry["Sample"], + entry["Lane"], + reads, + entry["Barcode sequence"] + ] + ) + except Exception as e: + problem_handler( + "exit", + f"Flowcell parser is unable to fetch all necessary fields for demux file: {str(e)}", + ) + return laneBC['sample_data'] + + def main(process_lims_id, demux_id, log_id): # Sets up logger basic_name = f"{log_id}_logfile.txt" @@ -813,7 +916,10 @@ def main(process_lims_id, demux_id, log_id): fill_process_fields(demux_process, process_stats) # Create the demux output file - parser_struct = write_demuxfile(process_stats, demux_id) + if "AVITI" in demux_process.process.type.name: + parser_struct = write_demuxfile_aviti(process_stats, demux_id) + else: + parser_struct = write_demuxfile(process_stats, demux_id) # Alters artifacts set_sample_values(demux_process, parser_struct, process_stats) diff --git a/scripts/manage_demux_stats_thresholds.py b/scripts/manage_demux_stats_thresholds.py index e4fcdb03..2fd562fe 100644 --- a/scripts/manage_demux_stats_thresholds.py +++ b/scripts/manage_demux_stats_thresholds.py @@ -18,6 +18,7 @@ def __init__(self, instrument, chemistry, paired, read_length): "NovaSeq", "NextSeq", "NovaSeqXPlus", + "AVITI" ] self.valid_chemistry = [ "MiSeq", @@ -36,6 +37,7 @@ def __init__(self, instrument, chemistry, paired, read_length): "10B", "1.5B", "25B", + "AVITI High" ] if ( @@ -105,6 +107,15 @@ def set_Q30(self): elif self.read_length < 100: self.Q30 = 85 + # Preliminary values for AVITI + elif self.instrument == "AVITI": + if self.read_length >= 150: + self.Q30 = 85 + elif self.read_length >= 100: + self.Q30 = 90 + elif self.read_length < 100: + self.Q30 = 95 + if not self.Q30: self.problem_handler( "exit", @@ -152,6 +163,10 @@ def set_exp_lane_clust(self): self.exp_lane_clust = 400e6 elif self.chemistry == "NextSeq 2000 P3": self.exp_lane_clust = 550e6 + # Preliminary values for AVITI + elif self.instrument == "AVITI": + if self.chemistry == "AVITI High": + self.exp_lane_clust = 100e6 else: self.problem_handler("exit", "Unknown run type!") if not self.exp_lane_clust: From 63be04fa6f869deace4aa1f9171bfaa32bd9a9e8 Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 10:46:32 +0200 Subject: [PATCH 2/4] Fix bug --- scripts/manage_demux_stats_thresholds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/manage_demux_stats_thresholds.py b/scripts/manage_demux_stats_thresholds.py index 2fd562fe..17769b8d 100644 --- a/scripts/manage_demux_stats_thresholds.py +++ b/scripts/manage_demux_stats_thresholds.py @@ -108,7 +108,7 @@ def set_Q30(self): self.Q30 = 85 # Preliminary values for AVITI - elif self.instrument == "AVITI": + elif self.instrument == "AVITI": if self.read_length >= 150: self.Q30 = 85 elif self.read_length >= 100: From a3bc565d4ab07ad2182ac52795115c61b1fa570f Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 10:47:03 +0200 Subject: [PATCH 3/4] ruff format --- scripts/manage_demux_stats.py | 55 +++++++++++------------- scripts/manage_demux_stats_thresholds.py | 4 +- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index 883fc3f8..bab361c4 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -53,7 +53,7 @@ def get_process_stats(demux_process): "AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)", "Illumina Sequencing (NextSeq) v1.0", "NovaSeqXPlus Run v1.0", - "AVITI Run v1.0" + "AVITI Run v1.0", } try: # Query LIMS for all steps containing the first input artifact of this step and match to the set of sequencing steps @@ -264,7 +264,7 @@ def set_sample_values(demux_process, parser_struct, process_stats): "AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)", "Illumina Sequencing (NextSeq) v1.0", "NovaSeqXPlus Run v1.0", - "AVITI Run v1.0" + "AVITI Run v1.0", } seq_process = lims.get_processes( inputartifactlimsid=demux_process.all_inputs()[0].id, type=seq_processes @@ -460,7 +460,7 @@ def set_sample_values(demux_process, parser_struct, process_stats): "AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)", "Illumina Sequencing (NextSeq) v1.0", "NovaSeqXPlus Run v1.0", - "AVITI Run v1.0" + "AVITI Run v1.0", ]: try: for inp in seq_process.all_outputs(): @@ -820,24 +820,26 @@ def write_demuxfile_aviti(process_stats, demux_id): try: laneBC = {} - laneBC['sample_data'] = [] - with open(lanebc_path, 'r') as lanebc_file: + laneBC["sample_data"] = [] + with open(lanebc_path, "r") as lanebc_file: reader = csv.DictReader(lanebc_file) for row in reader: - if '+' not in row['Lane']: - index = row.get('I1', '') - if row.get('I2'): - index += '-' - index += row['I2'] - laneBC['sample_data'].append( + if "+" not in row["Lane"]: + index = row.get("I1", "") + if row.get("I2"): + index += "-" + index += row["I2"] + laneBC["sample_data"].append( { - 'Lane' : row.get('Lane', ''), - 'Sample' : row.get('SampleName', ''), - 'Project' : row.get('Project', ''), - 'Barcode sequence' : index, - 'PF Clusters' : row.get('NumPoloniesAssigned', '0'), - '% of thelane' : row.get('PercentPoloniesAssigned', '0'), - 'Yield (Mbases)' : str(float(row.get('Yield(Gb)', '0'))*1000) + "Lane": row.get("Lane", ""), + "Sample": row.get("SampleName", ""), + "Project": row.get("Project", ""), + "Barcode sequence": index, + "PF Clusters": row.get("NumPoloniesAssigned", "0"), + "% of thelane": row.get("PercentPoloniesAssigned", "0"), + "Yield (Mbases)": str( + float(row.get("Yield(Gb)", "0")) * 1000 + ), } ) except Exception as e: @@ -851,17 +853,8 @@ def write_demuxfile_aviti(process_stats, demux_id): # Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams with open(fname, "w") as csvfile: writer = csv.writer(csvfile) - writer.writerow( - [ - "Project", - "Sample ID", - "Lane", - "# Reads", - "Index" - ] - ) - for entry in laneBC['sample_data']: - + writer.writerow(["Project", "Sample ID", "Lane", "# Reads", "Index"]) + for entry in laneBC["sample_data"]: reads = entry["PF Clusters"] if process_stats["Paired"]: @@ -876,7 +869,7 @@ def write_demuxfile_aviti(process_stats, demux_id): entry["Sample"], entry["Lane"], reads, - entry["Barcode sequence"] + entry["Barcode sequence"], ] ) except Exception as e: @@ -884,7 +877,7 @@ def write_demuxfile_aviti(process_stats, demux_id): "exit", f"Flowcell parser is unable to fetch all necessary fields for demux file: {str(e)}", ) - return laneBC['sample_data'] + return laneBC["sample_data"] def main(process_lims_id, demux_id, log_id): diff --git a/scripts/manage_demux_stats_thresholds.py b/scripts/manage_demux_stats_thresholds.py index 17769b8d..977db5da 100644 --- a/scripts/manage_demux_stats_thresholds.py +++ b/scripts/manage_demux_stats_thresholds.py @@ -18,7 +18,7 @@ def __init__(self, instrument, chemistry, paired, read_length): "NovaSeq", "NextSeq", "NovaSeqXPlus", - "AVITI" + "AVITI", ] self.valid_chemistry = [ "MiSeq", @@ -37,7 +37,7 @@ def __init__(self, instrument, chemistry, paired, read_length): "10B", "1.5B", "25B", - "AVITI High" + "AVITI High", ] if ( From 4a443b4aa246e558106c38b2a0872f37d1261265 Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 11:40:56 +0200 Subject: [PATCH 4/4] ruff fix --- scripts/manage_demux_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index bab361c4..cd5e8828 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -821,7 +821,7 @@ def write_demuxfile_aviti(process_stats, demux_id): try: laneBC = {} laneBC["sample_data"] = [] - with open(lanebc_path, "r") as lanebc_file: + with open(lanebc_path) as lanebc_file: reader = csv.DictReader(lanebc_file) for row in reader: if "+" not in row["Lane"]: