From 52664a9adbf38caea62a9221d5f47176d0807436 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 11:04:17 +0200 Subject: [PATCH 1/6] always explicate lanes in [SAMPLES] section --- scripts/generate_aviti_run_manifest.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index b7c7e455..3d6dd2e2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -68,15 +68,16 @@ def get_samples_section(process: Process) -> str: # Get the analytes placed into the flowcell arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] + lanes = [art_out.location[1].split(":")[1] for art_out in arts_out] - # Check whether lanes are individually addressable - lanes_used = set([art_out.location[1].split(":")[1] for art_out in arts_out]) - ungrouped_lanes = True if len(lanes_used) == 2 else False - logging.info(f"Individually addressable lanes: {ungrouped_lanes}") + # If only a single pool is added to the LIMS container, treat it as though it was loaded into both lanes + if len(lanes) == 1: + lanes.append("2" if lanes[0] == "1" else "1") + arts_out.append(arts_out[0]) # Iterate over pools all_rows = [] - for art_out in arts_out: + for art_out, lane in zip(arts_out, lanes): lane_rows = [] assert ( "AVITI Flow Cell" in art_out.container.type.name @@ -88,7 +89,6 @@ def get_samples_section(process: Process) -> str: art_out.reagent_labels ), "Unequal number of samples and reagent labels." - lane: str = art_out.location[1].split(":")[1] sample2label: dict[str, str] = get_pool_sample_label_mapping(art_out) samples = art_out.samples labels = art_out.reagent_labels @@ -116,8 +116,7 @@ def get_samples_section(process: Process) -> str: row["SampleName"] = sample.name row["Index1"] = index1 row["Index2"] = index2 - if ungrouped_lanes: - row["Lane"] = lane + row["Lane"] = lane lane_rows.append(row) @@ -133,8 +132,7 @@ def get_samples_section(process: Process) -> str: row["SampleName"] = "PhiX" row["Index1"] = phix_idx_pair[0] row["Index2"] = phix_idx_pair[1] - if ungrouped_lanes: - row["Lane"] = lane + row["Lane"] = lane lane_rows.append(row) # Check for index collision within lane, across samples and PhiX From 5b0ca7b022e265f5aadf23f0c1321986658fd707 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 11:09:23 +0200 Subject: [PATCH 2/6] sort samples section by name and lane --- scripts/generate_aviti_run_manifest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3d6dd2e2..16991ae5 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -140,6 +140,7 @@ def get_samples_section(process: Process) -> str: all_rows.extend(lane_rows) df = pd.DataFrame(all_rows) + df.sort_values(by=["SampleName", "Lane"], inplace=True) samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" From 5ac1299c8d167ad4498be14fbc617477f9b48bc0 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 11:12:36 +0200 Subject: [PATCH 3/6] Revert "sort samples section by name and lane" This reverts commit 5b0ca7b022e265f5aadf23f0c1321986658fd707. --- scripts/generate_aviti_run_manifest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 16991ae5..3d6dd2e2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -140,7 +140,6 @@ def get_samples_section(process: Process) -> str: all_rows.extend(lane_rows) df = pd.DataFrame(all_rows) - df.sort_values(by=["SampleName", "Lane"], inplace=True) samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" From 0ecb673aded3c708693a604121cbef902d9498bc Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 13:35:43 +0200 Subject: [PATCH 4/6] adapt to 2-lane container --- scripts/generate_aviti_run_manifest.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 3d6dd2e2..827f6f5d 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -66,14 +66,11 @@ def get_samples_section(process: Process) -> str: phix_loaded: bool = process.udf["PhiX Loaded"] - # Get the analytes placed into the flowcell + # Assert two output analytes placed in either flowcell lane arts_out = [op for op in process.all_outputs() if op.type == "Analyte"] + assert len(arts_out) == 2, "Expected two output analytes." lanes = [art_out.location[1].split(":")[1] for art_out in arts_out] - - # If only a single pool is added to the LIMS container, treat it as though it was loaded into both lanes - if len(lanes) == 1: - lanes.append("2" if lanes[0] == "1" else "1") - arts_out.append(arts_out[0]) + assert set(lanes) == {"1", "2"}, "Expected lanes 1 and 2." # Iterate over pools all_rows = [] From 0cbddfc60c54a6f85b6e5b2e6483a12c1394f275 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 13:45:37 +0200 Subject: [PATCH 5/6] Include flowcell ID, rename sanitation func and trim redundant metadata --- scripts/generate_aviti_run_manifest.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 827f6f5d..eee58152 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -21,6 +21,16 @@ LABEL_SEQ_SUBSTRING = re.compile(r"[ACGT]{4,}(-[ACGT]{4,})?") +def get_flowcell_id(process: Process) -> str: + flowcell_ids = [ + op.container.name for op in process.all_outputs() if op.type == "Analyte" + ] + + assert len(set(flowcell_ids)) == 1, "Expected one flowcell ID." + + return flowcell_ids[0] + + def get_runValues_section(process: Process, file_name: str) -> str: """Generate the [RUNVALUES] section of the AVITI run manifest and return it as a string.""" @@ -37,11 +47,8 @@ def get_runValues_section(process: Process, file_name: str) -> str: [ "[RUNVALUES]", "KeyName, Value", - f"lims_step_name, {safe_string(process.type.name)}", - f"lims_step_id, {process.id}", - f"lims_step_operator, {process.technician.name}", - f"file_name, {safe_string(file_name)}", - f"file_timestamp, {TIMESTAMP}", + f"lims_step_name, {sanitize(process.type.name)}", + f"file_name, {sanitize(file_name)}", f"read_recipe, {read_recipe}", ] ) @@ -244,7 +251,7 @@ def show_match(seq1: str, seq2: str) -> str: return lines -def safe_string(s: str) -> str: +def sanitize(s: str) -> str: """Wrap a string in quotes if it contains commas.""" if "," in s: return f'"{s}"' @@ -257,7 +264,9 @@ def main(args: Namespace): lims = Lims(BASEURI, USERNAME, PASSWORD) process = Process(lims, id=args.pid) - file_name = f"AVITI_run_manifest_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv" + # Name manifest file + flowcell_id = get_flowcell_id(process) + file_name = f"AVITI_run_manifest_{flowcell_id}_{process.id}_{TIMESTAMP}_{process.technician.name.replace(' ','')}.csv" # Build manifest logging.info("Starting to build run manifest.") From 20f6dbd1e02a5b823f0849e851d16c4904699f2a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 27 Aug 2024 13:53:33 +0200 Subject: [PATCH 6/6] add warning --- scripts/generate_aviti_run_manifest.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index eee58152..2cae17f3 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -27,8 +27,14 @@ def get_flowcell_id(process: Process) -> str: ] assert len(set(flowcell_ids)) == 1, "Expected one flowcell ID." + flowcell_id = flowcell_ids[0] - return flowcell_ids[0] + if "-" in flowcell_id: + logging.warning( + f"Container name {flowcell_id} contains a dash, did you forget to set the name of the LIMS container to the flowcell ID?" + ) + + return flowcell_id def get_runValues_section(process: Process, file_name: str) -> str: