diff --git a/VERSIONLOG.md b/VERSIONLOG.md index fb27a235..0773d3d0 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -4,6 +4,10 @@ Add script for AVITI run manifest generation, re-organize repo to follow best-practice modularization and implement EPP wrapper. +## 20240823.2 + +Add function to fetch sample-level Q30 for AVITI + ## 20240823.1 Support AVITI in the BCL conversion step diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py index cd5e8828..408e3df8 100644 --- a/scripts/manage_demux_stats.py +++ b/scripts/manage_demux_stats.py @@ -12,6 +12,7 @@ Written by Isak Sylvin; isak.sylvin@scilifelab.se""" import csv +import json import logging import os import re @@ -802,12 +803,50 @@ def write_demuxfile(process_stats, demux_id): return laneBC.sample_data +def fetch_project_sample_stats(projects_path): + unassigned_dir_name = "Unassigned" + + project_sample_stats = {} + + if os.path.exists(projects_path): + projects = os.listdir(projects_path) + if unassigned_dir_name in projects: + projects.remove(unassigned_dir_name) + for project in projects: + stats_json_path = os.path.join( + projects_path, project, f"{project}_RunStats.json" + ) + if os.path.exists(stats_json_path): + with open(stats_json_path) as stats_json: + project_sample_stats_raw = json.load(stats_json) + for sample_stats in project_sample_stats_raw["SampleStats"]: + sample_name = sample_stats["SampleName"] + percent_q30 = sample_stats["PercentQ30"] + quality_score_mean = sample_stats["QualityScoreMean"] + percent_mismatch = sample_stats["PercentMismatch"] + sample_yield = sample_stats["Yield"] + project_sample_stats[sample_name] = { + "percent_q30": percent_q30, + "quality_score_mean": quality_score_mean, + "percent_mismatch": percent_mismatch, + "sample_yield": sample_yield, + "project": project, + } + return project_sample_stats + else: + problem_handler( + "exit", + "The Samples folder is missing for fetching stats", + ) + + def write_demuxfile_aviti(process_stats, demux_id): """Creates demux_{FCID}.csv and attaches it to process""" # Includes windows drive letter support metadata_dir_name = "ngi-nas-ns" instrument_dir_name = "{}_data".format(process_stats["Instrument"]) + sample_dir_name = "Samples" lanebc_path = os.path.join( os.sep, @@ -818,6 +857,17 @@ def write_demuxfile_aviti(process_stats, demux_id): "IndexAssignment.csv", ) + projects_path = os.path.join( + os.sep, + "srv", + metadata_dir_name, + instrument_dir_name, + process_stats["Run ID"], + sample_dir_name, + ) + + project_sample_stats = fetch_project_sample_stats(projects_path) + try: laneBC = {} laneBC["sample_data"] = [] @@ -829,6 +879,23 @@ def write_demuxfile_aviti(process_stats, demux_id): if row.get("I2"): index += "-" index += row["I2"] + if project_sample_stats.get(row.get("SampleName")): + if project_sample_stats[row["SampleName"]].get("percent_q30"): + percent_q30 = project_sample_stats[row["SampleName"]][ + "percent_q30" + ] + if project_sample_stats[row["SampleName"]].get( + "quality_score_mean" + ): + quality_score_mean = project_sample_stats[ + row["SampleName"] + ]["quality_score_mean"] + if project_sample_stats[row["SampleName"]].get( + "percent_mismatch" + ): + percent_mismatch = project_sample_stats[row["SampleName"]][ + "percent_mismatch" + ] laneBC["sample_data"].append( { "Lane": row.get("Lane", ""), @@ -837,6 +904,10 @@ def write_demuxfile_aviti(process_stats, demux_id): "Barcode sequence": index, "PF Clusters": row.get("NumPoloniesAssigned", "0"), "% of thelane": row.get("PercentPoloniesAssigned", "0"), + "% >= Q30bases": percent_q30, + "Mean QualityScore": quality_score_mean, + "% Perfectbarcode": 100 - percent_mismatch, + "% One mismatchbarcode": percent_mismatch, "Yield (Mbases)": str( float(row.get("Yield(Gb)", "0")) * 1000 ), @@ -853,7 +924,20 @@ def write_demuxfile_aviti(process_stats, demux_id): # Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams with open(fname, "w") as csvfile: writer = csv.writer(csvfile) - writer.writerow(["Project", "Sample ID", "Lane", "# Reads", "Index"]) + writer.writerow( + [ + "Project", + "Sample ID", + "Lane", + "# Reads", + "Index", + "% of >= Q30 Bases (PF)", + "Mean QualityScore", + "% Perfectbarcode", + "% One mismatchbarcode", + "Yield (Mbases)", + ] + ) for entry in laneBC["sample_data"]: reads = entry["PF Clusters"] @@ -870,6 +954,11 @@ def write_demuxfile_aviti(process_stats, demux_id): entry["Lane"], reads, entry["Barcode sequence"], + entry["% >= Q30bases"], + entry["Mean QualityScore"], + entry["% Perfectbarcode"], + entry["% One mismatchbarcode"], + entry["Yield (Mbases)"], ] ) except Exception as e: