From df992acd73df8906ae681d9624f1819a7415206f Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Fri, 15 Mar 2024 15:12:24 +0100 Subject: [PATCH] Fix cases that MiSeq samplesheet misses index or index2 --- VERSIONLOG.md | 4 ++++ taca/analysis/analysis.py | 2 +- taca/illumina/Runs.py | 4 ++++ taca/illumina/Standard_Runs.py | 4 ++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index bca6360e..4d464718 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # TACA Version Log +## 20240315.1 + +Fix cases that MiSeq samplesheet misses index or index2 + ## 20240304.1 - Make sure TACA can handle runs that generate NO sequencing data at all diff --git a/taca/analysis/analysis.py b/taca/analysis/analysis.py index f9418f46..ffacfb27 100755 --- a/taca/analysis/analysis.py +++ b/taca/analysis/analysis.py @@ -115,7 +115,7 @@ def _upload_to_statusdb(run): # Check if I have NoIndex lanes for element in parser.obj["samplesheet_csv"]: if ( - "NoIndex" in element["index"] or not element["index"] + "NoIndex" in element.get("index", "") or not element.get("index") ): # NoIndex in the case of HiSeq, empty in the case of HiSeqX lane = element["Lane"] # This is a lane with NoIndex # In this case PF Cluster is the number of undetermined reads diff --git a/taca/illumina/Runs.py b/taca/illumina/Runs.py index 2bd33e3c..876ed1eb 100644 --- a/taca/illumina/Runs.py +++ b/taca/illumina/Runs.py @@ -453,6 +453,10 @@ def _classify_lanes(self, samplesheets): # Prepare a list for lanes with NoIndex samples noindex_lanes = [] for entry in self.runParserObj.samplesheet.data: + if not entry.get("index"): + entry["index"] = "" + if not entry.get("index2"): + entry["index2"] = "" if entry["index"].upper() == "NOINDEX" or ( entry["index"] == "" and entry["index2"] == "" ): diff --git a/taca/illumina/Standard_Runs.py b/taca/illumina/Standard_Runs.py index a7f9a2d9..08d6fdbc 100755 --- a/taca/illumina/Standard_Runs.py +++ b/taca/illumina/Standard_Runs.py @@ -131,6 +131,10 @@ def _classify_samples(self, indexfile, ssparser, runSetup): sample_name = sample.get("Sample_Name") or sample.get("SampleName") umi_length = [0, 0] read_length = read_cycles + if not sample.get("index"): + sample["index"] = "" + if not sample.get("index2"): + sample["index2"] = "" # Read the length of read 1 and read 2 from the field Recipe if sample.get("Recipe") and RECIPE_PAT.findall(sample.get("Recipe")): ss_read_length = [