SciLifeLab · chuan-wang · Aug 26, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/VERSIONLOG.md b/VERSIONLOG.md
@@ -1,5 +1,9 @@
 # Scilifelab_epps Version Log
 
+## 20240823.2
+
+Add function to fetch sample-level Q30 for AVITI
+
 ## 20240823.1
 
 Support AVITI in the BCL conversion step

diff --git a/scripts/manage_demux_stats.py b/scripts/manage_demux_stats.py
@@ -12,6 +12,7 @@
 Written by Isak Sylvin; [email protected]"""
 
 import csv
+import json
 import logging
 import os
 import re
@@ -802,12 +803,50 @@ def write_demuxfile(process_stats, demux_id):
     return laneBC.sample_data
 
 
+def fetch_project_sample_stats(projects_path):
+    unassigned_dir_name = "Unassigned"
+
+    project_sample_stats = {}
+
+    if os.path.exists(projects_path):
+        projects = os.listdir(projects_path)
+        if unassigned_dir_name in projects:
+            projects.remove(unassigned_dir_name)
+        for project in projects:
+            stats_json_path = os.path.join(
+                projects_path, project, f"{project}_RunStats.json"
+            )
+            if os.path.exists(stats_json_path):
+                with open(stats_json_path) as stats_json:
+                    project_sample_stats_raw = json.load(stats_json)
+                for sample_stats in project_sample_stats_raw["SampleStats"]:
+                    sample_name = sample_stats["SampleName"]
+                    percent_q30 = sample_stats["PercentQ30"]
+                    quality_score_mean = sample_stats["QualityScoreMean"]
+                    percent_mismatch = sample_stats["PercentMismatch"]
+                    sample_yield = sample_stats["Yield"]
+                    project_sample_stats[sample_name] = {
+                        "percent_q30": percent_q30,
+                        "quality_score_mean": quality_score_mean,
+                        "percent_mismatch": percent_mismatch,
+                        "sample_yield": sample_yield,
+                        "project": project,
+                    }
+        return project_sample_stats
+    else:
+        problem_handler(
+            "exit",
+            "The Samples folder is missing for fetching stats",
+        )
+
+
 def write_demuxfile_aviti(process_stats, demux_id):
     """Creates demux_{FCID}.csv and attaches it to process"""
     # Includes windows drive letter support
 
     metadata_dir_name = "ngi-nas-ns"
     instrument_dir_name = "{}_data".format(process_stats["Instrument"])
+    sample_dir_name = "Samples"
 
     lanebc_path = os.path.join(
         os.sep,
@@ -818,6 +857,17 @@ def write_demuxfile_aviti(process_stats, demux_id):
         "IndexAssignment.csv",
     )
 
+    projects_path = os.path.join(
+        os.sep,
+        "srv",
+        metadata_dir_name,
+        instrument_dir_name,
+        process_stats["Run ID"],
+        sample_dir_name,
+    )
+
+    project_sample_stats = fetch_project_sample_stats(projects_path)
+
     try:
         laneBC = {}
         laneBC["sample_data"] = []
@@ -829,6 +879,23 @@ def write_demuxfile_aviti(process_stats, demux_id):
                     if row.get("I2"):
                         index += "-"
                         index += row["I2"]
+                    if project_sample_stats.get(row.get("SampleName")):
+                        if project_sample_stats[row["SampleName"]].get("percent_q30"):
+                            percent_q30 = project_sample_stats[row["SampleName"]][
+                                "percent_q30"
+                            ]
+                        if project_sample_stats[row["SampleName"]].get(
+                            "quality_score_mean"
+                        ):
+                            quality_score_mean = project_sample_stats[
+                                row["SampleName"]
+                            ]["quality_score_mean"]
+                        if project_sample_stats[row["SampleName"]].get(
+                            "percent_mismatch"
+                        ):
+                            percent_mismatch = project_sample_stats[row["SampleName"]][
+                                "percent_mismatch"
+                            ]
                     laneBC["sample_data"].append(
                         {
                             "Lane": row.get("Lane", ""),
@@ -837,6 +904,10 @@ def write_demuxfile_aviti(process_stats, demux_id):
                             "Barcode sequence": index,
                             "PF Clusters": row.get("NumPoloniesAssigned", "0"),
                             "% of thelane": row.get("PercentPoloniesAssigned", "0"),
+                            "% >= Q30bases": percent_q30,
+                            "Mean QualityScore": quality_score_mean,
+                            "% Perfectbarcode": 100 - percent_mismatch,
+                            "% One mismatchbarcode": percent_mismatch,
                             "Yield (Mbases)": str(
                                 float(row.get("Yield(Gb)", "0")) * 1000
                             ),
@@ -853,7 +924,20 @@ def write_demuxfile_aviti(process_stats, demux_id):
     # Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams
     with open(fname, "w") as csvfile:
         writer = csv.writer(csvfile)
-        writer.writerow(["Project", "Sample ID", "Lane", "# Reads", "Index"])
+        writer.writerow(
+            [
+                "Project",
+                "Sample ID",
+                "Lane",
+                "# Reads",
+                "Index",
+                "% of >= Q30 Bases (PF)",
+                "Mean QualityScore",
+                "% Perfectbarcode",
+                "% One mismatchbarcode",
+                "Yield (Mbases)",
+            ]
+        )
         for entry in laneBC["sample_data"]:
             reads = entry["PF Clusters"]
 
@@ -870,6 +954,11 @@ def write_demuxfile_aviti(process_stats, demux_id):
                         entry["Lane"],
                         reads,
                         entry["Barcode sequence"],
+                        entry["% >= Q30bases"],
+                        entry["Mean QualityScore"],
+                        entry["% Perfectbarcode"],
+                        entry["% One mismatchbarcode"],
+                        entry["Yield (Mbases)"],
                     ]
                 )
             except Exception as e: