Skip to content

Commit

Permalink
handle special idxs
Browse files Browse the repository at this point in the history
  • Loading branch information
kedhammar committed Sep 11, 2024
1 parent ee7aa17 commit 6be8ca2
Showing 1 changed file with 74 additions and 28 deletions.
102 changes: 74 additions & 28 deletions scripts/generate_aviti_run_manifest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python

import json
import logging
import os
import re
Expand All @@ -13,12 +14,18 @@
from genologics.lims import Lims
from Levenshtein import hamming as distance

from data.Chromium_10X_indexes import Chromium_10X_indexes
from scilifelab_epps.epp import upload_file
from scilifelab_epps.wrapper import epp_decorator
from scripts.generate_minknow_samplesheet import get_pool_sample_label_mapping

TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S")
LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?")

# Pre-compile regexes in global scope:
IDX_PAT = re.compile("([ATCG]{4,}N*)-?([ATCG]*)")
TENX_SINGLE_PAT = re.compile("SI-(?:GA|NA)-[A-H][1-9][0-2]?")
TENX_DUAL_PAT = re.compile("SI-(?:TT|NT|NN|TN|TS)-[A-H][1-9][0-2]?")
SMARTSEQ_PAT = re.compile("SMARTSEQ[1-9]?-[1-9][0-9]?[A-P]")

# Set up Element PhiX control sets, keys are options in LIMS dropdown UDF
PHIX_SETS = {
Expand Down Expand Up @@ -51,6 +58,13 @@
},
}

# Load SS3 indexes
SMARTSEQ3_indexes_json = (
"/opt/gls/clarity/users/glsai/repos/scilifelab_epps/data/SMARTSEQ3_indexes.json"
)
with open(SMARTSEQ3_indexes_json) as file:
SMARTSEQ3_indexes = json.loads(file.read())


def get_flowcell_id(process: Process) -> str:
flowcell_ids = [
Expand Down Expand Up @@ -105,6 +119,47 @@ def get_settings_section() -> str:
return settings_section


def idxs_from_label(label: str) -> list[str | tuple[str, str]]:
"""From a LIMS reagent label, return list whose elements are
single indices or tuples of dual index pairs.
"""

# Initialize result
idxs = []

# Expand 10X single indexes
if TENX_SINGLE_PAT.findall(label):
for tenXidx in Chromium_10X_indexes[TENX_SINGLE_PAT.findall(label)[0]]:
idxs.append(tenXidx)
# Case of 10X dual indexes
elif TENX_DUAL_PAT.findall(label):
i7_idx = Chromium_10X_indexes[TENX_DUAL_PAT.findall(label)[0][0]]
i5_idx = Chromium_10X_indexes[TENX_DUAL_PAT.findall(label)[0][1]]
idxs.append((i7_idx, revcomp(i5_idx)))
# Case of SS3 indexes
elif SMARTSEQ_PAT.findall(label):
for i7_idx in SMARTSEQ3_indexes[label][0]:
for i5_idx in SMARTSEQ3_indexes[label][1]:
idxs.append((i7_idx, revcomp(i5_idx)))
# NoIndex cases
elif label.replace(",", "").upper() == "NOINDEX" or (
label.replace(",", "").upper() == ""
):
raise AssertionError("NoIndex cases not allowed.")
# Ordinary indexes
elif IDX_PAT.findall(label):
idx_match = IDX_PAT.findall(label)[0]
if "-" in idx_match:
idx1, idx2 = idx_match.split("-")
idxs.append((idx1, idx2))
else:
idxs.append(idx_match)
else:
raise AssertionError(f"Could not parse index from '{label}'.")

return idxs


def get_samples_section(process: Process) -> str:
"""Generate the [SAMPLES] section of the AVITI run manifest and return it as a string."""

Expand Down Expand Up @@ -134,28 +189,13 @@ def get_samples_section(process: Process) -> str:
), "Unequal number of samples and reagent labels."

sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out)
samples = art_out.samples
labels = art_out.reagent_labels

assert len(set(labels)) == len(labels), "Detected non-unique reagent labels."
assert len(set(art_out.reagent_labels)) == len(
art_out.reagent_labels
), "Detected non-unique reagent labels."

samples = art_out.samples
# Iterate over samples
for sample in samples:
lims_label = sample2label[sample.name]

# Parse sample index
label_seq_match = re.search(LABEL_SEQ_SUBSTRING, lims_label)
assert (
label_seq_match is not None
), f"Could not parse label sequence from {lims_label}"
label_seq = label_seq_match.group(0)

if "-" in label_seq:
index1, index2 = label_seq.split("-")
else:
index1 = label_seq
index2 = ""

# Project name and sequencing setup
if sample.project:
project = sample.project.name.replace(".", "__").replace(",", "")
Expand All @@ -164,15 +204,21 @@ def get_samples_section(process: Process) -> str:
project = "Control"
seq_setup = "0-0"

row = {}
row["SampleName"] = sample.name
row["Index1"] = index1
row["Index2"] = index2
row["Lane"] = lane
row["Project"] = project
row["Recipe"] = seq_setup
# Add row(s), depending on index type
lims_label = sample2label[sample.name]
for idx in idxs_from_label(lims_label):
row = {}
row["SampleName"] = sample.name
if isinstance(idx, tuple):
row["Index1"], row["Index2"] = idx
else:
row["Index1"] = idx
row["Index2"] = ""
row["Lane"] = lane
row["Project"] = project
row["Recipe"] = seq_setup

lane_rows.append(row)
lane_rows.append(row)

# Add PhiX controls if added:
phix_loaded: bool = art_out.udf["% phiX"] != 0
Expand Down

0 comments on commit 6be8ca2

Please sign in to comment.