Skip to content

Commit

Permalink
Merge pull request #368 from chuan-wang/master
Browse files Browse the repository at this point in the history
Updates index checker and AVITI stats parser
  • Loading branch information
chuan-wang authored Oct 1, 2024
2 parents a632166 + 61671f3 commit eb50872
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 80 deletions.
8 changes: 8 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Scilifelab_epps Version Log

## 20241001.1

Update index checker EPP to capture invalid bases

## 20240930.1

For AVITI manifest generation, assume idx2 > 12 cycles and no idx2 parsed means idx2 is UMI and add Ns to manifest.
Expand All @@ -8,6 +12,10 @@ For AVITI manifest generation, assume idx2 > 12 cycles and no idx2 parsed means

Add 10X steps to comments-to-running-notes config.

## 20240924.2

Update method for fetching AVITI stats in the BCL conversion step

## 20240924.1

Fix bug with data type in frag_an_driver_gen
Expand Down
22 changes: 15 additions & 7 deletions scripts/index_distance_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

# Pre-compile regexes in global scope:
IDX_PAT = re.compile("([ATCG]{4,}N*)-?([ATCG]*)")
VALIDBASES_PAT = re.compile(r"^[ATCGN\-]+$")
TENX_SINGLE_PAT = re.compile("SI-(?:GA|NA)-[A-H][1-9][0-2]?")
TENX_DUAL_PAT = re.compile("SI-(?:TT|NT|NN|TN|TS)-[A-H][1-9][0-2]?")
SMARTSEQ_PAT = re.compile("SMARTSEQ[1-9]?-[1-9][0-9]?[A-P]")
Expand Down Expand Up @@ -549,14 +550,21 @@ def find_barcode(sample_idxs, sample, process):
reagent_label_name = art.reagent_labels[0].upper()
if reagent_label_name and reagent_label_name != "NOINDEX":
if (
IDX_PAT.findall(reagent_label_name)
and len(IDX_PAT.findall(reagent_label_name)) > 1
) or (
not (
(
IDX_PAT.findall(reagent_label_name)
or TENX_SINGLE_PAT.findall(reagent_label_name)
or TENX_DUAL_PAT.findall(reagent_label_name)
or SMARTSEQ_PAT.findall(reagent_label_name)
and len(IDX_PAT.findall(reagent_label_name)) > 1
)
or (
IDX_PAT.findall(reagent_label_name)
and not VALIDBASES_PAT.findall(reagent_label_name)
)
or (
not (
IDX_PAT.findall(reagent_label_name)
or TENX_SINGLE_PAT.findall(reagent_label_name)
or TENX_DUAL_PAT.findall(reagent_label_name)
or SMARTSEQ_PAT.findall(reagent_label_name)
)
)
):
sys.stderr.write(
Expand Down
87 changes: 14 additions & 73 deletions scripts/manage_demux_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
Written by Isak Sylvin; [email protected]"""

import csv
import json
import logging
import os
import re
Expand Down Expand Up @@ -817,50 +816,12 @@ def write_demuxfile(process_stats, demux_id):
return laneBC.sample_data


def fetch_project_sample_stats(projects_path):
unassigned_dir_name = "Unassigned"

project_sample_stats = {}

if os.path.exists(projects_path):
projects = os.listdir(projects_path)
if unassigned_dir_name in projects:
projects.remove(unassigned_dir_name)
for project in projects:
stats_json_path = os.path.join(
projects_path, project, f"{project}_RunStats.json"
)
if os.path.exists(stats_json_path):
with open(stats_json_path) as stats_json:
project_sample_stats_raw = json.load(stats_json)
for sample_stats in project_sample_stats_raw["SampleStats"]:
sample_name = sample_stats["SampleName"]
percent_q30 = sample_stats["PercentQ30"]
quality_score_mean = sample_stats["QualityScoreMean"]
percent_mismatch = sample_stats["PercentMismatch"]
sample_yield = sample_stats["Yield"]
project_sample_stats[sample_name] = {
"percent_q30": percent_q30,
"quality_score_mean": quality_score_mean,
"percent_mismatch": percent_mismatch,
"sample_yield": sample_yield,
"project": project,
}
return project_sample_stats
else:
problem_handler(
"exit",
"The Samples folder is missing for fetching stats",
)


def write_demuxfile_aviti(process_stats, demux_id):
"""Creates demux_{FCID}.csv and attaches it to process"""
# Includes windows drive letter support

metadata_dir_name = "ngi-nas-ns"
instrument_dir_name = "{}_data".format(process_stats["Instrument"])
sample_dir_name = "Samples"

lanebc_path = os.path.join(
os.sep,
Expand All @@ -871,17 +832,6 @@ def write_demuxfile_aviti(process_stats, demux_id):
"IndexAssignment.csv",
)

projects_path = os.path.join(
os.sep,
"srv",
metadata_dir_name,
instrument_dir_name,
process_stats["Run ID"],
sample_dir_name,
)

project_sample_stats = fetch_project_sample_stats(projects_path)

try:
laneBC = {}
laneBC["sample_data"] = []
Expand All @@ -893,35 +843,26 @@ def write_demuxfile_aviti(process_stats, demux_id):
if row.get("I2"):
index += "-"
index += row["I2"]
if project_sample_stats.get(row.get("SampleName")):
if project_sample_stats[row["SampleName"]].get("percent_q30"):
percent_q30 = project_sample_stats[row["SampleName"]][
"percent_q30"
]
if project_sample_stats[row["SampleName"]].get(
"quality_score_mean"
):
quality_score_mean = project_sample_stats[
row["SampleName"]
]["quality_score_mean"]
if project_sample_stats[row["SampleName"]].get(
"percent_mismatch"
):
percent_mismatch = project_sample_stats[row["SampleName"]][
"percent_mismatch"
]

laneBC["sample_data"].append(
{
"Lane": row.get("Lane", ""),
"Sample": row.get("SampleName", ""),
"Project": row.get("Project", ""),
"Barcode sequence": index,
"PF Clusters": row.get("NumPoloniesAssigned", "0"),
"% of thelane": row.get("PercentPoloniesAssigned", "0"),
"% >= Q30bases": percent_q30,
"Mean QualityScore": quality_score_mean,
"% Perfectbarcode": 100 - percent_mismatch,
"% One mismatchbarcode": percent_mismatch,
"PF Clusters": int(row.get("NumPoloniesAssigned", "0")),
"% of thelane": float(
row.get("PercentPoloniesAssigned", "0")
),
"% >= Q30bases": float(row.get("PercentQ30", "0")),
"Mean QualityScore": float(
row.get("QualityScoreMean", "0")
),
"% Perfectbarcode": 100
- float(row.get("PercentMismatch", "0")),
"% One mismatchbarcode": float(
row.get("PercentMismatch", "0")
),
"Yield (Mbases)": str(
float(row.get("Yield(Gb)", "0")) * 1000
),
Expand Down

0 comments on commit eb50872

Please sign in to comment.