Skip to content

Commit

Permalink
Merge branch 'master' into AKe-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
kedhammar authored Aug 27, 2024
2 parents 5ac1299 + 89873cb commit 3ea2f1b
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 1 deletion.
4 changes: 4 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

Add script for AVITI run manifest generation, re-organize repo to follow best-practice modularization and implement EPP wrapper.

## 20240823.2

Add function to fetch sample-level Q30 for AVITI

## 20240823.1

Support AVITI in the BCL conversion step
Expand Down
91 changes: 90 additions & 1 deletion scripts/manage_demux_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Written by Isak Sylvin; [email protected]"""

import csv
import json
import logging
import os
import re
Expand Down Expand Up @@ -802,12 +803,50 @@ def write_demuxfile(process_stats, demux_id):
return laneBC.sample_data


def fetch_project_sample_stats(projects_path):
unassigned_dir_name = "Unassigned"

project_sample_stats = {}

if os.path.exists(projects_path):
projects = os.listdir(projects_path)
if unassigned_dir_name in projects:
projects.remove(unassigned_dir_name)
for project in projects:
stats_json_path = os.path.join(
projects_path, project, f"{project}_RunStats.json"
)
if os.path.exists(stats_json_path):
with open(stats_json_path) as stats_json:
project_sample_stats_raw = json.load(stats_json)
for sample_stats in project_sample_stats_raw["SampleStats"]:
sample_name = sample_stats["SampleName"]
percent_q30 = sample_stats["PercentQ30"]
quality_score_mean = sample_stats["QualityScoreMean"]
percent_mismatch = sample_stats["PercentMismatch"]
sample_yield = sample_stats["Yield"]
project_sample_stats[sample_name] = {
"percent_q30": percent_q30,
"quality_score_mean": quality_score_mean,
"percent_mismatch": percent_mismatch,
"sample_yield": sample_yield,
"project": project,
}
return project_sample_stats
else:
problem_handler(
"exit",
"The Samples folder is missing for fetching stats",
)


def write_demuxfile_aviti(process_stats, demux_id):
"""Creates demux_{FCID}.csv and attaches it to process"""
# Includes windows drive letter support

metadata_dir_name = "ngi-nas-ns"
instrument_dir_name = "{}_data".format(process_stats["Instrument"])
sample_dir_name = "Samples"

lanebc_path = os.path.join(
os.sep,
Expand All @@ -818,6 +857,17 @@ def write_demuxfile_aviti(process_stats, demux_id):
"IndexAssignment.csv",
)

projects_path = os.path.join(
os.sep,
"srv",
metadata_dir_name,
instrument_dir_name,
process_stats["Run ID"],
sample_dir_name,
)

project_sample_stats = fetch_project_sample_stats(projects_path)

try:
laneBC = {}
laneBC["sample_data"] = []
Expand All @@ -829,6 +879,23 @@ def write_demuxfile_aviti(process_stats, demux_id):
if row.get("I2"):
index += "-"
index += row["I2"]
if project_sample_stats.get(row.get("SampleName")):
if project_sample_stats[row["SampleName"]].get("percent_q30"):
percent_q30 = project_sample_stats[row["SampleName"]][
"percent_q30"
]
if project_sample_stats[row["SampleName"]].get(
"quality_score_mean"
):
quality_score_mean = project_sample_stats[
row["SampleName"]
]["quality_score_mean"]
if project_sample_stats[row["SampleName"]].get(
"percent_mismatch"
):
percent_mismatch = project_sample_stats[row["SampleName"]][
"percent_mismatch"
]
laneBC["sample_data"].append(
{
"Lane": row.get("Lane", ""),
Expand All @@ -837,6 +904,10 @@ def write_demuxfile_aviti(process_stats, demux_id):
"Barcode sequence": index,
"PF Clusters": row.get("NumPoloniesAssigned", "0"),
"% of thelane": row.get("PercentPoloniesAssigned", "0"),
"% >= Q30bases": percent_q30,
"Mean QualityScore": quality_score_mean,
"% Perfectbarcode": 100 - percent_mismatch,
"% One mismatchbarcode": percent_mismatch,
"Yield (Mbases)": str(
float(row.get("Yield(Gb)", "0")) * 1000
),
Expand All @@ -853,7 +924,20 @@ def write_demuxfile_aviti(process_stats, demux_id):
# Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams
with open(fname, "w") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Project", "Sample ID", "Lane", "# Reads", "Index"])
writer.writerow(
[
"Project",
"Sample ID",
"Lane",
"# Reads",
"Index",
"% of >= Q30 Bases (PF)",
"Mean QualityScore",
"% Perfectbarcode",
"% One mismatchbarcode",
"Yield (Mbases)",
]
)
for entry in laneBC["sample_data"]:
reads = entry["PF Clusters"]

Expand All @@ -870,6 +954,11 @@ def write_demuxfile_aviti(process_stats, demux_id):
entry["Lane"],
reads,
entry["Barcode sequence"],
entry["% >= Q30bases"],
entry["Mean QualityScore"],
entry["% Perfectbarcode"],
entry["% One mismatchbarcode"],
entry["Yield (Mbases)"],
]
)
except Exception as e:
Expand Down

0 comments on commit 3ea2f1b

Please sign in to comment.