Skip to content

Commit

Permalink
Merge pull request #344 from chuan-wang/master
Browse files Browse the repository at this point in the history
Support AVITI in the BCL conversion step
  • Loading branch information
chuan-wang authored Aug 23, 2024
2 parents 4c65eee + 4a443b4 commit accbef8
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 2 deletions.
4 changes: 4 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Scilifelab_epps Version Log

## 20240823.1

Support AVITI in the BCL conversion step

## 20240822.1

New EPP scripts for parsing AVITI run parameters and stats
Expand Down
2 changes: 1 addition & 1 deletion scripts/aviti_run_parameter_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def set_step_udfs(process, run_dir):
process.udf["Index Read 2"] = run_parameters["Cycles"].get("I2", 0)

process.udf["Read Order"] = run_parameters.get("ReadOrder")
process.udf["Throughut Selection"] = run_parameters.get("ThroughputSelection")
process.udf["Throughput Selection"] = run_parameters.get("ThroughputSelection")
process.udf["Kit Configuration"] = run_parameters.get("KitConfiguration")
process.udf["Preparation Workflow"] = run_parameters.get("PreparationWorkflow")
process.udf["Chemistry Version"] = run_parameters.get("ChemistryVersion")
Expand Down
101 changes: 100 additions & 1 deletion scripts/manage_demux_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def get_process_stats(demux_process):
"AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)",
"Illumina Sequencing (NextSeq) v1.0",
"NovaSeqXPlus Run v1.0",
"AVITI Run v1.0",
}
try:
# Query LIMS for all steps containing the first input artifact of this step and match to the set of sequencing steps
Expand Down Expand Up @@ -121,6 +122,21 @@ def get_process_stats(demux_process):
)
proc_stats["Paired"] = True if seq_process.udf.get("Read 2 Cycles") else False

elif "AVITI Run" in seq_process.type.name:
try:
proc_stats["Chemistry"] = "AVITI" + seq_process.udf["Throughput Selection"]
except Exception as e:
problem_handler(
"exit", f"No flowcell version set in sequencing step: {str(e)}"
)
proc_stats["Instrument"] = "AVITI"
proc_stats["Read Length"] = (
max(seq_process.udf["Read 1 Cycles"], seq_process.udf["Read 2 Cycles"])
if seq_process.udf.get("Read 2 Cycles")
else seq_process.udf["Read 1 Cycles"]
)
proc_stats["Paired"] = True if seq_process.udf.get("Read 2 Cycles") else False

else:
problem_handler("exit", "Unhandled workflow step (run type)")
logger.info("Run type/chemistry set to {}".format(proc_stats["Chemistry"]))
Expand Down Expand Up @@ -248,6 +264,7 @@ def set_sample_values(demux_process, parser_struct, process_stats):
"AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)",
"Illumina Sequencing (NextSeq) v1.0",
"NovaSeqXPlus Run v1.0",
"AVITI Run v1.0",
}
seq_process = lims.get_processes(
inputartifactlimsid=demux_process.all_inputs()[0].id, type=seq_processes
Expand Down Expand Up @@ -443,6 +460,7 @@ def set_sample_values(demux_process, parser_struct, process_stats):
"AUTOMATED - NovaSeq Run (NovaSeq 6000 v2.0)",
"Illumina Sequencing (NextSeq) v1.0",
"NovaSeqXPlus Run v1.0",
"AVITI Run v1.0",
]:
try:
for inp in seq_process.all_outputs():
Expand Down Expand Up @@ -784,6 +802,84 @@ def write_demuxfile(process_stats, demux_id):
return laneBC.sample_data


def write_demuxfile_aviti(process_stats, demux_id):
"""Creates demux_{FCID}.csv and attaches it to process"""
# Includes windows drive letter support

metadata_dir_name = "ngi-nas-ns"
instrument_dir_name = "{}_data".format(process_stats["Instrument"])

lanebc_path = os.path.join(
os.sep,
"srv",
metadata_dir_name,
instrument_dir_name,
process_stats["Run ID"],
"IndexAssignment.csv",
)

try:
laneBC = {}
laneBC["sample_data"] = []
with open(lanebc_path) as lanebc_file:
reader = csv.DictReader(lanebc_file)
for row in reader:
if "+" not in row["Lane"]:
index = row.get("I1", "")
if row.get("I2"):
index += "-"
index += row["I2"]
laneBC["sample_data"].append(
{
"Lane": row.get("Lane", ""),
"Sample": row.get("SampleName", ""),
"Project": row.get("Project", ""),
"Barcode sequence": index,
"PF Clusters": row.get("NumPoloniesAssigned", "0"),
"% of thelane": row.get("PercentPoloniesAssigned", "0"),
"Yield (Mbases)": str(
float(row.get("Yield(Gb)", "0")) * 1000
),
}
)
except Exception as e:
problem_handler(
"exit",
f"Unable to fetch IndexAssignment.csv from {lanebc_path}: {str(e)}",
)

fname = "{}_demuxstats_{}.csv".format(demux_id, process_stats["Flow Cell ID"])

# Writes less undetermined info than undemultiplex_index.py. May cause problems downstreams
with open(fname, "w") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Project", "Sample ID", "Lane", "# Reads", "Index"])
for entry in laneBC["sample_data"]:
reads = entry["PF Clusters"]

if process_stats["Paired"]:
reads = int(reads.replace(",", "")) * 2
else:
reads = int(reads.replace(",", ""))

try:
writer.writerow(
[
entry["Project"],
entry["Sample"],
entry["Lane"],
reads,
entry["Barcode sequence"],
]
)
except Exception as e:
problem_handler(
"exit",
f"Flowcell parser is unable to fetch all necessary fields for demux file: {str(e)}",
)
return laneBC["sample_data"]


def main(process_lims_id, demux_id, log_id):
# Sets up logger
basic_name = f"{log_id}_logfile.txt"
Expand Down Expand Up @@ -813,7 +909,10 @@ def main(process_lims_id, demux_id, log_id):
fill_process_fields(demux_process, process_stats)

# Create the demux output file
parser_struct = write_demuxfile(process_stats, demux_id)
if "AVITI" in demux_process.process.type.name:
parser_struct = write_demuxfile_aviti(process_stats, demux_id)
else:
parser_struct = write_demuxfile(process_stats, demux_id)

# Alters artifacts
set_sample_values(demux_process, parser_struct, process_stats)
Expand Down
15 changes: 15 additions & 0 deletions scripts/manage_demux_stats_thresholds.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self, instrument, chemistry, paired, read_length):
"NovaSeq",
"NextSeq",
"NovaSeqXPlus",
"AVITI",
]
self.valid_chemistry = [
"MiSeq",
Expand All @@ -36,6 +37,7 @@ def __init__(self, instrument, chemistry, paired, read_length):
"10B",
"1.5B",
"25B",
"AVITI High",
]

if (
Expand Down Expand Up @@ -105,6 +107,15 @@ def set_Q30(self):
elif self.read_length < 100:
self.Q30 = 85

# Preliminary values for AVITI
elif self.instrument == "AVITI":
if self.read_length >= 150:
self.Q30 = 85
elif self.read_length >= 100:
self.Q30 = 90
elif self.read_length < 100:
self.Q30 = 95

if not self.Q30:
self.problem_handler(
"exit",
Expand Down Expand Up @@ -152,6 +163,10 @@ def set_exp_lane_clust(self):
self.exp_lane_clust = 400e6
elif self.chemistry == "NextSeq 2000 P3":
self.exp_lane_clust = 550e6
# Preliminary values for AVITI
elif self.instrument == "AVITI":
if self.chemistry == "AVITI High":
self.exp_lane_clust = 100e6
else:
self.problem_handler("exit", "Unknown run type!")
if not self.exp_lane_clust:
Expand Down

0 comments on commit accbef8

Please sign in to comment.