From 15d58d5ba412278822e1dffe97a6dc399c7c8d0a Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 4 Nov 2024 13:04:24 +0000 Subject: [PATCH] 'report' command: update reporting of multiplexed samples to handle multiple configs. --- auto_process_ngs/commands/report_cmd.py | 20 +++--- .../test/commands/test_report_cmd.py | 65 +++++++++++++++++++ 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/auto_process_ngs/commands/report_cmd.py b/auto_process_ngs/commands/report_cmd.py index a854b23c6..a224a869b 100644 --- a/auto_process_ngs/commands/report_cmd.py +++ b/auto_process_ngs/commands/report_cmd.py @@ -741,16 +741,16 @@ def get_multiplexed_samples(project): if project.info.library_type in ("CellPlex", "CellPlex scRNA-seq", "Flex"): - # Fetch implicit multiplexed sample info from config - try: - multi_config = CellrangerMultiConfigCsv( - os.path.join(project.dirn,"10x_multi_config.csv")) - # Return multiplexed sample names - return multi_config.sample_names - except FileNotFoundError: - # Multiplexed samples expected but can't be identified - # Return empty list - return [] + # Fetch implicit multiplexed sample info from configs + multiplexed_samples = [] + for f in os.listdir(project.dirn): + if f.startswith("10x_multi_config.") and \ + f.endswith(".csv"): + multi_config = CellrangerMultiConfigCsv( + os.path.join(project.dirn, f)) + multiplexed_samples.extend(multi_config.sample_names) + # Return multiplexed sample names + return sorted(multiplexed_samples) else: # No multiplexed samples expected # Return None diff --git a/auto_process_ngs/test/commands/test_report_cmd.py b/auto_process_ngs/test/commands/test_report_cmd.py index b5ef86b7e..7b0299816 100644 --- a/auto_process_ngs/test/commands/test_report_cmd.py +++ b/auto_process_ngs/test/commands/test_report_cmd.py @@ -2508,6 +2508,71 @@ def test_get_multiplexed_samples_10x_multi_config(self): ABM2,CMO302,ABM2 ABM3,CMO303,ABM3 ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) + self.assertEqual(get_multiplexed_samples(project), + ["ABM1", "ABM2", "ABM3", "ABM4"]) + + def test_get_multiplexed_samples_multiple_10x_multi_configs(self): + """ + report: test 'get_multiplexed_samples' (multiple 10x multi configs) + """ + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "sequencer_model": "MiSeq", + "analysis_number": 2 }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": None, + "Sequencer model": "MiSeq" } + }, + top_dir=self.dirn) + mockdir.create() + # Add cellranger multi config.csv files + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.AB1.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB1,%s,any,AB1,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +""" % (fastq_dir,fastq_dir)) + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.AB2.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB2,%s,any,AB2,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 """ % (fastq_dir,fastq_dir)) project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) self.assertEqual(get_multiplexed_samples(project),