From b6db5cdfddfa452ff24706f4a4e2820b83792d19 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 30 Jan 2024 16:34:00 +0100 Subject: [PATCH 1/3] properly handle cases w/o ONT barcodes --- scripts/parse_anglerfish_results.py | 32 ++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index 758ec252..53045749 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -36,7 +36,7 @@ def find_latest_flowcell_run(currentStep: Process) -> str: def find_latest_anglerfish_run(latest_flowcell_run_path: str) -> str: anglerfish_query = f"{latest_flowcell_run_path}/**/anglerfish_run*" - anglerfish_glob = glob.glob(anglerfish_query) + anglerfish_glob = glob.glob(anglerfish_query, recursive=True) assert ( len(anglerfish_glob) != 0 @@ -88,7 +88,9 @@ def parse_data(df_raw: pd.DataFrame): # Sample reads divided by sum of all sample reads w. the same barcode lambda row: row["num_reads"] / df[df["ont_barcode"] == row["ont_barcode"]]["num_reads"].sum() - * 100, + * 100 + if not pd.isna(row["ont_barcode"]) + else None, axis=1, ) @@ -141,17 +143,23 @@ def fill_udfs(currentStep: Process, df: pd.DataFrame): for illumina_sample in illumina_samples: try: - barcode_name = ont_barcode_well2name( - fetch(illumina_sample, "ONT Barcode Well") + # Get ONT barcode well, if there is one + barcode_well = fetch( + illumina_sample, "ONT Barcode Well", on_fail=None ) - - # Subset df to the current ONT barcode - df_barcode = df[df["ont_barcode"] == barcode_name] - - # Further subset df to the current Illumina sample - df_sample = df_barcode[ - df_barcode["sample_name"] == illumina_sample.name - ] + if barcode_well: + barcode_name = ont_barcode_well2name(barcode_well) + + # Subset df to the current ONT barcode + df_barcode = df[df["ont_barcode"] == barcode_name] + + # Subset df to the current Illumina sample + if barcode_well: + df_sample = df_barcode[ + df_barcode["sample_name"] == illumina_sample.name + ] + else: + df_sample = df[df["sample_name"] == illumina_sample.name] assert ( len(df_sample) == 1 From f04329dde2eb89f9315d0d0cd9a0aaee3d0cdc32 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 30 Jan 2024 16:34:30 +0100 Subject: [PATCH 2/3] bump vlog --- VERSIONLOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 20c22f71..99349156 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240130.1 + +Handle Anglerfish result parsing for runs W/O ONT barcodes + ## 20240126.1 Discover latest anglerfish run even if embedded in subdir of run dir From 380ab3885798dd6b397c1a317d010ef4cbaa06d7 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Tue, 30 Jan 2024 16:36:17 +0100 Subject: [PATCH 3/3] simplify logic --- scripts/parse_anglerfish_results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index 53045749..fcc9bed5 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -147,17 +147,18 @@ def fill_udfs(currentStep: Process, df: pd.DataFrame): barcode_well = fetch( illumina_sample, "ONT Barcode Well", on_fail=None ) + if barcode_well: barcode_name = ont_barcode_well2name(barcode_well) # Subset df to the current ONT barcode df_barcode = df[df["ont_barcode"] == barcode_name] - # Subset df to the current Illumina sample - if barcode_well: + # Subset df to the current Illumina sample df_sample = df_barcode[ df_barcode["sample_name"] == illumina_sample.name ] + else: df_sample = df[df["sample_name"] == illumina_sample.name]