diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 20c22f71..99349156 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240130.1 + +Handle Anglerfish result parsing for runs W/O ONT barcodes + ## 20240126.1 Discover latest anglerfish run even if embedded in subdir of run dir diff --git a/scripts/parse_anglerfish_results.py b/scripts/parse_anglerfish_results.py index 758ec252..fcc9bed5 100644 --- a/scripts/parse_anglerfish_results.py +++ b/scripts/parse_anglerfish_results.py @@ -36,7 +36,7 @@ def find_latest_flowcell_run(currentStep: Process) -> str: def find_latest_anglerfish_run(latest_flowcell_run_path: str) -> str: anglerfish_query = f"{latest_flowcell_run_path}/**/anglerfish_run*" - anglerfish_glob = glob.glob(anglerfish_query) + anglerfish_glob = glob.glob(anglerfish_query, recursive=True) assert ( len(anglerfish_glob) != 0 @@ -88,7 +88,9 @@ def parse_data(df_raw: pd.DataFrame): # Sample reads divided by sum of all sample reads w. the same barcode lambda row: row["num_reads"] / df[df["ont_barcode"] == row["ont_barcode"]]["num_reads"].sum() - * 100, + * 100 + if not pd.isna(row["ont_barcode"]) + else None, axis=1, ) @@ -141,17 +143,24 @@ def fill_udfs(currentStep: Process, df: pd.DataFrame): for illumina_sample in illumina_samples: try: - barcode_name = ont_barcode_well2name( - fetch(illumina_sample, "ONT Barcode Well") + # Get ONT barcode well, if there is one + barcode_well = fetch( + illumina_sample, "ONT Barcode Well", on_fail=None ) - # Subset df to the current ONT barcode - df_barcode = df[df["ont_barcode"] == barcode_name] + if barcode_well: + barcode_name = ont_barcode_well2name(barcode_well) - # Further subset df to the current Illumina sample - df_sample = df_barcode[ - df_barcode["sample_name"] == illumina_sample.name - ] + # Subset df to the current ONT barcode + df_barcode = df[df["ont_barcode"] == barcode_name] + + # Subset df to the current Illumina sample + df_sample = df_barcode[ + df_barcode["sample_name"] == illumina_sample.name + ] + + else: + df_sample = df[df["sample_name"] == illumina_sample.name] assert ( len(df_sample) == 1