From 106668e188a7d223c542d3ab2d3e18adcc9fe1e4 Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 15:21:30 +0200 Subject: [PATCH 1/3] Add function to fetch sample-level Q30 for AVITI --- VERSIONLOG.md | 4 +++ scripts/manage_demux_stats.py | 68 ++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 894cfc5b..46cbf2df 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240823.2 + +Add function to fetch sample-level Q30 for AVITI + ## 20240823.1 Support AVITI in the BCL conversion step diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index cd5e8828..2dc1c3cd 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -12,6 +12,7 @@ Written by Isak Sylvin; isak.sylvin@scilifelab.se""" import csv +import json import logging import os import re @@ -802,12 +803,50 @@ def write_demuxfile(process_stats, demux_id): return laneBC.sample_data +def fetch_project_sample_stats(projects_path): + unassigned_dir_name = "Unassigned" + + project_sample_stats = {} + + if os.path.exists(projects_path): + projects = os.listdir(projects_path) + if unassigned_dir_name in projects: + projects.remove(unassigned_dir_name) + for project in projects: + stats_json_path = os.path.join( + projects_path, project, f"{project}_RunStats.json" + ) + if os.path.exists(stats_json_path): + with open(stats_json_path) as stats_json: + project_sample_stats_raw = json.load(stats_json) + for sample_stats in project_sample_stats_raw["SampleStats"]: + sample_name = sample_stats["SampleName"] + percent_q30 = sample_stats["PercentQ30"] + quality_score_mean = sample_stats["QualityScoreMean"] + percent_mismatch = sample_stats["PercentMismatch"] + sample_yield = sample_stats["Yield"] + project_sample_stats[sample_name] = { + "percent_q30": percent_q30, + "quality_score_mean": quality_score_mean, + "percent_mismatch": percent_mismatch, + "sample_yield": sample_yield, + "project": project, + } + return project_sample_stats + else: + problem_handler( + "exit", + "The Samples folder is missing for fetching Q30 values", + ) + + def write_demuxfile_aviti(process_stats, demux_id): """Creates demux_{FCID}.csv and attaches it to process""" # Includes windows drive letter support metadata_dir_name = "ngi-nas-ns" instrument_dir_name = "{}_data".format(process_stats["Instrument"]) + sample_dir_name = "Samples" lanebc_path = os.path.join( os.sep, @@ -818,6 +857,17 @@ def write_demuxfile_aviti(process_stats, demux_id): "IndexAssignment.csv", ) + projects_path = os.path.join( + os.sep, + "srv", + metadata_dir_name, + instrument_dir_name, + process_stats["Run ID"], + sample_dir_name, + ) + + project_sample_stats = fetch_project_sample_stats(projects_path) + try: laneBC = {} laneBC["sample_data"] = [] @@ -829,6 +879,11 @@ def write_demuxfile_aviti(process_stats, demux_id): if row.get("I2"): index += "-" index += row["I2"] + if project_sample_stats.get(row.get("SampleName")): + if project_sample_stats[row["SampleName"]].get("percent_q30"): + percent_q30 = project_sample_stats[row["SampleName"]][ + "percent_q30" + ] laneBC["sample_data"].append( { "Lane": row.get("Lane", ""), @@ -836,6 +891,7 @@ def write_demuxfile_aviti(process_stats, demux_id): "Project": row.get("Project", ""), "Barcode sequence": index, "PF Clusters": row.get("NumPoloniesAssigned", "0"), + "% >= Q30bases": percent_q30, "% of thelane": row.get("PercentPoloniesAssigned", "0"), "Yield (Mbases)": str( float(row.get("Yield(Gb)", "0")) * 1000 @@ -853,7 +909,16 @@ def write_demuxfile_aviti(process_stats, demux_id): # Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams with open(fname, "w") as csvfile: writer = csv.writer(csvfile) - writer.writerow(["Project", "Sample ID", "Lane", "# Reads", "Index"]) + writer.writerow( + [ + "Project", + "Sample ID", + "Lane", + "# Reads", + "Index", + "% of >= Q30 Bases (PF)", + ] + ) for entry in laneBC["sample_data"]: reads = entry["PF Clusters"] @@ -870,6 +935,7 @@ def write_demuxfile_aviti(process_stats, demux_id): entry["Lane"], reads, entry["Barcode sequence"], + entry["% >= Q30bases"], ] ) except Exception as e: From acaa6235220da43e32ce0f42d3644a9de43348e0 Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 15:34:15 +0200 Subject: [PATCH 2/3] Additional stats supported --- scripts/manage_demux_stats.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index 2dc1c3cd..a95fbcd3 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -884,6 +884,18 @@ def write_demuxfile_aviti(process_stats, demux_id): percent_q30 = project_sample_stats[row["SampleName"]][ "percent_q30" ] + if project_sample_stats[row["SampleName"]].get( + "quality_score_mean" + ): + quality_score_mean = project_sample_stats[ + row["SampleName"] + ]["quality_score_mean"] + if project_sample_stats[row["SampleName"]].get( + "percent_mismatch" + ): + percent_mismatch = project_sample_stats[row["SampleName"]][ + "percent_mismatch" + ] laneBC["sample_data"].append( { "Lane": row.get("Lane", ""), @@ -891,8 +903,11 @@ def write_demuxfile_aviti(process_stats, demux_id): "Project": row.get("Project", ""), "Barcode sequence": index, "PF Clusters": row.get("NumPoloniesAssigned", "0"), - "% >= Q30bases": percent_q30, "% of thelane": row.get("PercentPoloniesAssigned", "0"), + "% >= Q30bases": percent_q30, + "Mean QualityScore": quality_score_mean, + "% Perfectbarcode": 100 - percent_mismatch, + "% One mismatchbarcode": percent_mismatch, "Yield (Mbases)": str( float(row.get("Yield(Gb)", "0")) * 1000 ), @@ -917,6 +932,10 @@ def write_demuxfile_aviti(process_stats, demux_id): "# Reads", "Index", "% of >= Q30 Bases (PF)", + "Mean QualityScore", + "% Perfectbarcode", + "% One mismatchbarcode", + "Yield (Mbases)", ] ) for entry in laneBC["sample_data"]: @@ -936,6 +955,10 @@ def write_demuxfile_aviti(process_stats, demux_id): reads, entry["Barcode sequence"], entry["% >= Q30bases"], + entry["Mean QualityScore"], + entry["% Perfectbarcode"], + entry["% One mismatchbarcode"], + entry["Yield (Mbases)"], ] ) except Exception as e: From 9baf7430c983d798e7cdb6851a4b636167ca1623 Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 23 Aug 2024 15:40:00 +0200 Subject: [PATCH 3/3] Small refactor --- scripts/manage_demux_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index a95fbcd3..408e3df8 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -836,7 +836,7 @@ def fetch_project_sample_stats(projects_path): else: problem_handler( "exit", - "The Samples folder is missing for fetching Q30 values", + "The Samples folder is missing for fetching stats", )