Skip to content

Commit

Permalink
Merge pull request #420 from chuan-wang/master
Browse files Browse the repository at this point in the history
Fix cases that MiSeq samplesheet misses index or index2
  • Loading branch information
chuan-wang authored Mar 26, 2024
2 parents 6d7d339 + b5c8feb commit dde2ba1
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 33 deletions.
8 changes: 8 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# TACA Version Log

## 20240321.1

Include project IDs in the run folder tarball

## 20240315.1

Fix cases that MiSeq samplesheet misses index or index2

## 20240304.1

- Make sure TACA can handle runs that generate NO sequencing data at all
Expand Down
12 changes: 6 additions & 6 deletions taca/analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ def _upload_to_statusdb(run):
parser = run.runParserObj
# Check if I have NoIndex lanes
for element in parser.obj["samplesheet_csv"]:
if (
"NoIndex" in element["index"] or not element["index"]
if "NoIndex" in element.get("index", "") or not element.get(
"index"
): # NoIndex in the case of HiSeq, empty in the case of HiSeqX
lane = element["Lane"] # This is a lane with NoIndex
# In this case PF Cluster is the number of undetermined reads
Expand Down Expand Up @@ -208,7 +208,7 @@ def transfer_runfolder(run_dir, pid, exclude_lane):

# Create a tar archive of the runfolder
dir_name = os.path.basename(run_dir)
archive = run_dir + ".tar.gz"
archive = run_dir + "_" + "_".join(pid_list) + ".tar.gz"
run_dir_path = os.path.dirname(run_dir)

# Prepare the options for excluding lanes
Expand Down Expand Up @@ -411,13 +411,13 @@ def _process(run):
)
else:
sbt = f"{run.id} Demultiplexing Completed!"
msg = """The run {run} has been demultiplexed without any error or warning.
msg = f"""The run {run.id} has been demultiplexed without any error or warning.
The Run will be transferred to the analysis cluster for further analysis.
The run is available at : https://genomics-status.scilifelab.se/flowcells/{run}
The run is available at : https://genomics-status.scilifelab.se/flowcells/{run.id}
""".format(run=run.id)
"""
run.send_mail(sbt, msg, rcp=CONFIG["mail"]["recipients"])

# Copy demultiplex stats file, InterOp meta data and run xml files to shared file system for LIMS purpose
Expand Down
10 changes: 2 additions & 8 deletions taca/cleanup/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@ def cleanup_miarka(
if all_undet_files:
undet_size = _def_get_size_unit(sum(map(os.path.getsize, all_undet_files)))
if misc.query_yes_no(
"In total found {} undetermined files which are {} in size, delete now ?".format(
len(all_undet_files), undet_size
),
f"In total found {len(all_undet_files)} undetermined files which are {undet_size} in size, delete now ?",
default="no",
):
_remove_files(all_undet_files)
Expand Down Expand Up @@ -313,11 +311,7 @@ def cleanup_miarka(
for proj, info in project_clean_list.items():
proj_count += 1
if not misc.query_yes_no(
"{}Delete files for this project ({}/{})".format(
get_proj_meta_info(info, days_fastq),
proj_count,
len(project_clean_list),
),
f"{get_proj_meta_info(info, days_fastq)}Delete files for this project ({proj_count}/{len(project_clean_list)})",
default="no",
):
logger.info(f"Will not remove files for project {proj}")
Expand Down
10 changes: 7 additions & 3 deletions taca/illumina/Runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,10 @@ def transfer_run(self, t_file, mail_recipients=None):
# Send an email notifying that the transfer was successful
runname = self.id
sbt = f"Rsync of data for run {runname} to the analysis cluster has finished"
msg = """ Rsync of data for run {run} to the analysis cluster has finished!
msg = f""" Rsync of data for run {runname} to the analysis cluster has finished!
The run is available at : https://genomics-status.scilifelab.se/flowcells/{run}
""".format(run=runname)
The run is available at : https://genomics-status.scilifelab.se/flowcells/{runname}
"""
if mail_recipients:
send_mail(sbt, msg, mail_recipients)

Expand Down Expand Up @@ -453,6 +453,10 @@ def _classify_lanes(self, samplesheets):
# Prepare a list for lanes with NoIndex samples
noindex_lanes = []
for entry in self.runParserObj.samplesheet.data:
if not entry.get("index"):
entry["index"] = ""
if not entry.get("index2"):
entry["index2"] = ""
if entry["index"].upper() == "NOINDEX" or (
entry["index"] == "" and entry["index2"] == ""
):
Expand Down
4 changes: 4 additions & 0 deletions taca/illumina/Standard_Runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ def _classify_samples(self, indexfile, ssparser, runSetup):
sample_name = sample.get("Sample_Name") or sample.get("SampleName")
umi_length = [0, 0]
read_length = read_cycles
if not sample.get("index"):
sample["index"] = ""
if not sample.get("index2"):
sample["index2"] = ""
# Read the length of read 1 and read 2 from the field Recipe
if sample.get("Recipe") and RECIPE_PAT.findall(sample.get("Recipe")):
ss_read_length = [
Expand Down
18 changes: 2 additions & 16 deletions taca/utils/bioinfo_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,7 @@ def update_statusdb(run_dir):
for k, v in remote_doc.items():
obj["values"][k] = v
logger.info(
"Updating {} {} {} {} {} as {}".format(
run_id,
project,
flowcell,
lane,
sample,
sample_status,
)
f"Updating {run_id} {project} {flowcell} {lane} {sample} as {sample_status}"
)
# Sorts timestamps
obj["values"] = OrderedDict(
Expand All @@ -123,14 +116,7 @@ def update_statusdb(run_dir):
# Creates new entry
else:
logger.info(
"Creating {} {} {} {} {} as {}".format(
run_id,
project,
flowcell,
lane,
sample,
sample_status,
)
f"Creating {run_id} {project} {flowcell} {lane} {sample} as {sample_status}"
)
# Creates record
db.save(obj)
Expand Down

0 comments on commit dde2ba1

Please sign in to comment.