From ca02d78896097dbb8a005dde9eefc82104833374 Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 4 Nov 2024 11:39:21 +0000 Subject: [PATCH 1/4] 'report' command: handle "Cellplex scRNA-seq" when reporting multiplexed samples. --- auto_process_ngs/commands/report_cmd.py | 4 + .../test/commands/test_report_cmd.py | 291 ++++++++++++++++++ 2 files changed, 295 insertions(+) diff --git a/auto_process_ngs/commands/report_cmd.py b/auto_process_ngs/commands/report_cmd.py index b4ae75968..eb61717f5 100644 --- a/auto_process_ngs/commands/report_cmd.py +++ b/auto_process_ngs/commands/report_cmd.py @@ -165,6 +165,7 @@ def report_info(ap): info.multiplexed_samples.split(',')), project.prettyPrintSamples()) elif project.info.library_type in ("CellPlex", + "CellPlex scRNA-seq", "Flex"): # Fetch implicit multiplexed sample info from config try: @@ -233,6 +234,7 @@ def report_concise(ap): split(',')) has_multiplexed_samples = True elif p.info.library_type in ("CellPlex", + "CellPlex scRNA-seq", "Flex"): # Fetch implicit multiplexed sample info from config try: @@ -465,6 +467,7 @@ def report_summary(ap): split(',')) has_multiplexed_samples = True elif project.info.library_type in ("CellPlex", + "CellPlex scRNA-seq", "Flex"): # Fetch implicit multiplexed sample info from config try: @@ -649,6 +652,7 @@ def fetch_value(ap,project,field): # 10x CellPlex and Flex data multi_config = None if project.info.library_type in ("CellPlex", + "CellPlex scRNA-seq", "Flex"): try: multi_config = CellrangerMultiConfigCsv( diff --git a/auto_process_ngs/test/commands/test_report_cmd.py b/auto_process_ngs/test/commands/test_report_cmd.py index e8735326b..8dd6af03f 100644 --- a/auto_process_ngs/test/commands/test_report_cmd.py +++ b/auto_process_ngs/test/commands/test_report_cmd.py @@ -327,6 +327,112 @@ def test_report_info_10x_cellplex(self): QC : not verified Comments: None +- undetermined + ------------ + User : None + PI : None + Library : None + SC Plat.: None + Organism: None + Dir : undetermined + #samples: 1 + #cells : + Samples : Undetermined + QC : not verified + Comments: None""" % mockdir.dirn + for o,e in zip(report_info(ap).split('\n'), + expected.split('\n')): + self.assertEqual(o,e) + + def test_report_info_10x_cellplex_scrnaseq(self): + """report: report 10xGenomics CellPlex scRNA-seq run in 'info' mode + """ + # Make a mock auto-process directory + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": 1311 + }, + "CDE": { "User": "Charles David Edwards", + "Library type": "ChIP-seq", + "Organism": "Mouse", + "PI": "Colin Delaney Eccleston" } + }, + top_dir=self.dirn) + mockdir.create() + # Add a cellranger multi config.csv file + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + # Make autoprocess instance + ap = AutoProcess(analysis_dir=mockdir.dirn) + # Generate concise report + expected = """Run ID : MISEQ_170901#87 +Directory : %s +Platform : miseq +Unaligned dir: bcl2fastq + +Summary of data in 'bcl2fastq' dir: + +- AB: AB1-2 (2 paired end samples) +- CDE: CDE3-4 (2 paired end samples) + +3 analysis projects: + +- AB + -- + User : Alison Bell + PI : Audrey Bower + Library : CellPlex scRNA-seq + SC Plat.: 10xGenomics Chromium 3'v3 + Organism: Human + Dir : AB + #samples: 4 multiplexed (2 physical) + #cells : 1311 + Samples : ABM1-4 (AB1-2) + QC : not verified + Comments: None + +- CDE + --- + User : Charles David Edwards + PI : Colin Delaney Eccleston + Library : ChIP-seq + SC Plat.: None + Organism: Mouse + Dir : CDE + #samples: 2 + #cells : + Samples : CDE3-4 + QC : not verified + Comments: None + - undetermined ------------ User : None @@ -807,6 +913,57 @@ def test_report_concise_10x_cellplex(self): self.assertEqual(report_concise(ap), "Paired end: 'AB': Alison Bell, Human 10xGenomics Chromium 3'v3 CellPlex (PI: Audrey Bower) (4 multiplexed samples/1311 cells); 'CDE': Charles David Edwards, Mouse ChIP-seq (PI: Colin Delaney Eccleston) (2 samples)") + def test_report_concise_10x_cellplex_scrnaseq(self): + """report: report 10xGenomics CellPlex scRNA-seq run in 'concise' mode + """ + # Make a mock auto-process directory + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": 1311 }, + "CDE": { "User": "Charles David Edwards", + "Library type": "ChIP-seq", + "Organism": "Mouse", + "PI": "Colin Delaney Eccleston" } + }, + top_dir=self.dirn) + mockdir.create() + # Add a cellranger multi config.csv file + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + # Make autoprocess instance + ap = AutoProcess(analysis_dir=mockdir.dirn) + # Generate concise report + self.assertEqual(report_concise(ap), + "Paired end: 'AB': Alison Bell, Human 10xGenomics Chromium 3'v3 CellPlex scRNA-seq (PI: Audrey Bower) (4 multiplexed samples/1311 cells); 'CDE': Charles David Edwards, Mouse ChIP-seq (PI: Colin Delaney Eccleston) (2 samples)") + def test_report_concise_10x_flex(self): """report: report 10xGenomics Flex run in 'concise' mode """ @@ -1184,6 +1341,82 @@ def test_report_summary_10x_cellplex(self): - 'AB': Alison Bell Human CellPlex (10xGenomics Chromium 3'v3) 4 multiplexed samples/1311 cells (PI Audrey Bower) - 'CDE': Charles David Edwards Mouse ChIP-seq 2 samples (PI Colin Delaney Eccleston) +Additional notes/comments: +- CDE: Repeat of previous run +""" % mockdir.dirn + for o,e in zip(report_summary(ap).split('\n'), + expected.split('\n')): + self.assertEqual(o,e) + + def test_report_summary_10x_cellplex_scrnaseq(self): + """report: report 10xGenomics CellPlex scRNA-seq run in 'summary' mode + """ + # Make a mock auto-process directory + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "bcl2fastq_software": + "('/usr/bin/bcl2fastq', 'bcl2fastq', '2.17.1.14')", + "cellranger_software": + "('/usr/bin/cellranger', 'cellranger', '3.0.1')", + "sequencer_model": "MiSeq" }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": 1311 }, + "CDE": { "User": "Charles David Edwards", + "Library type": "ChIP-seq", + "Organism": "Mouse", + "PI": "Colin Delaney Eccleston", + "Comments": "Repeat of previous run" } + }, + top_dir=self.dirn) + mockdir.create() + # Add a cellranger multi config.csv file + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + # Make autoprocess instance + ap = AutoProcess(analysis_dir=mockdir.dirn) + # Generate summary report + expected = """MISEQ run #87 datestamped 170901 +================================ +Run name : 170901_M00879_0087_000000000-AGEW9 +Reference : MISEQ_170901#87 +Platform : MISEQ +Sequencer : MiSeq +Directory : %s +Endedness : Paired end +Bcl2fastq : bcl2fastq 2.17.1.14 +Cellranger: cellranger 3.0.1 + +2 projects: +- 'AB': Alison Bell Human CellPlex scRNA-seq (10xGenomics Chromium 3'v3) 4 multiplexed samples/1311 cells (PI Audrey Bower) +- 'CDE': Charles David Edwards Mouse ChIP-seq 2 samples (PI Colin Delaney Eccleston) + Additional notes/comments: - CDE: Repeat of previous run """ % mockdir.dirn @@ -1739,6 +1972,64 @@ def test_report_projects_10x_cellplex(self): # Generate projects report expected = """MISEQ_170901#87\t87\ttesting\t\tAlison Bell\tAudrey Bower\tCellPlex\t10xGenomics Chromium 3'v3\tHuman\tMISEQ\t4\t1311\tyes\tABM1-4 MISEQ_170901#87\t87\ttesting\t\tCharles David Edwards\tColin Delaney Eccleston\tChIP-seq\t\tMouse\tMISEQ\t2\t\tyes\tCDE3-4 +""" + for o,e in zip(report_projects(ap).split('\n'), + expected.split('\n')): + self.assertEqual(o,e) + + def test_report_projects_10x_cellplex_scrnaseq(self): + """report: report 10xGenomics CellPlex scRNA-seq run in 'projects' mode + """ + # Make a mock auto-process directory + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "sequencer_model": "MiSeq" }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": 1311, + "Sequencer model": "MiSeq" }, + "CDE": { "User": "Charles David Edwards", + "Library type": "ChIP-seq", + "Organism": "Mouse", + "PI": "Colin Delaney Eccleston", + "Sequencer model": "MiSeq" } + }, + top_dir=self.dirn) + mockdir.create() + # Add a cellranger multi config.csv file + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + # Make autoprocess instance and set required metadata + ap = AutoProcess(analysis_dir=mockdir.dirn) + # Generate projects report + expected = """MISEQ_170901#87\t87\ttesting\t\tAlison Bell\tAudrey Bower\tCellPlex scRNA-seq\t10xGenomics Chromium 3'v3\tHuman\tMISEQ\t4\t1311\tyes\tABM1-4 +MISEQ_170901#87\t87\ttesting\t\tCharles David Edwards\tColin Delaney Eccleston\tChIP-seq\t\tMouse\tMISEQ\t2\t\tyes\tCDE3-4 """ for o,e in zip(report_projects(ap).split('\n'), expected.split('\n')): From 0a28bf1152e1c338f89d7bfaf7c476478c2d77b7 Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 4 Nov 2024 12:38:21 +0000 Subject: [PATCH 2/4] 'report' command: new helper function 'get_multiplexed_samples'. --- auto_process_ngs/commands/report_cmd.py | 32 +++++ .../test/commands/test_report_cmd.py | 123 ++++++++++++++++++ 2 files changed, 155 insertions(+) diff --git a/auto_process_ngs/commands/report_cmd.py b/auto_process_ngs/commands/report_cmd.py index eb61717f5..60907a4ec 100644 --- a/auto_process_ngs/commands/report_cmd.py +++ b/auto_process_ngs/commands/report_cmd.py @@ -745,3 +745,35 @@ def default_value(s,default=""): if s is None: return default return s + +def get_multiplexed_samples(project): + """ + Return the names of implicit multiplexed samples in a project + + Arguments: + project (AnalysisProject): project to get implicit multiplexed + samples for + + Returns: + List: list of multiplexed sample names, or empty list if + project should have multiplexed samples but the cannot be + identified. Returns None if project is not of a type to + have multiplexed samples. + """ + if project.info.library_type in ("CellPlex", + "CellPlex scRNA-seq", + "Flex"): + # Fetch implicit multiplexed sample info from config + try: + multi_config = CellrangerMultiConfigCsv( + os.path.join(project.dirn,"10x_multi_config.csv")) + # Return multiplexed sample names + return multi_config.sample_names + except FileNotFoundError: + # Multiplexed samples expected but can't be identified + # Return empty list + return [] + else: + # No multiplexed samples expected + # Return None + return None diff --git a/auto_process_ngs/test/commands/test_report_cmd.py b/auto_process_ngs/test/commands/test_report_cmd.py index 8dd6af03f..b5ef86b7e 100644 --- a/auto_process_ngs/test/commands/test_report_cmd.py +++ b/auto_process_ngs/test/commands/test_report_cmd.py @@ -17,6 +17,7 @@ from auto_process_ngs.commands.report_cmd import report_projects from auto_process_ngs.commands.report_cmd import fetch_value from auto_process_ngs.commands.report_cmd import default_value +from auto_process_ngs.commands.report_cmd import get_multiplexed_samples # Unit tests @@ -2389,3 +2390,125 @@ def test_default_value(self): self.assertEqual("",default_value(None)) self.assertEqual("Goodbye",default_value(None,default="Goodbye")) self.assertEqual(0,default_value(0)) + +class TestGetMultiplexedSamplesFunction(unittest.TestCase): + """ + Tests for the 'get_multiplexed_samples' function + """ + def setUp(self): + # Create a temp working dir + self.dirn = tempfile.mkdtemp(suffix='TestGetMultiplexedSamples') + # Store original location so we can get back at the end + self.pwd = os.getcwd() + # Move to working dir + os.chdir(self.dirn) + + def tearDown(self): + # Return to original dir + os.chdir(self.pwd) + # Remove the temporary test directory + def del_rw(action,name,excinfo): + # Explicitly remove read only files/ + # dirs + os.chmod(os.path.dirname(name),0o755) + os.chmod(name,0o655) + os.remove(name) + shutil.rmtree(self.dirn,onerror=del_rw) + + def test_get_multiplexed_samples_no_samples(self): + """ + report: test 'get_multiplexed_samples' (no multiplexed samples) + """ + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "sequencer_model": "MiSeq", + "analysis_number": 2 }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": None, + "Number of cells": None, + "Sequencer model": "MiSeq" } + }, + top_dir=self.dirn) + mockdir.create() + project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) + self.assertEqual(get_multiplexed_samples(project), None) + + def test_get_multiplexed_samples_missing_samples(self): + """ + report: test 'get_multiplexed_samples' (missing multiplexed samples) + """ + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "sequencer_model": "MiSeq", + "analysis_number": 2 }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": None, + "Sequencer model": "MiSeq" } + }, + top_dir=self.dirn) + mockdir.create() + project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) + self.assertEqual(get_multiplexed_samples(project), []) + + def test_get_multiplexed_samples_10x_multi_config(self): + """ + report: test 'get_multiplexed_samples' (use 10x multi config) + """ + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "sequencer_model": "MiSeq", + "analysis_number": 2 }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": None, + "Sequencer model": "MiSeq" } + }, + top_dir=self.dirn) + mockdir.create() + # Add a cellranger multi config.csv file + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) + self.assertEqual(get_multiplexed_samples(project), + ["ABM1", "ABM2", "ABM3", "ABM4"]) From d1c3354372eb812a9b147d65169812db716df86e Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 4 Nov 2024 12:39:03 +0000 Subject: [PATCH 3/4] 'report' command: refactor to use 'get_multiplexed_samples' for reporting multiplexed samples. --- auto_process_ngs/commands/report_cmd.py | 98 ++++++++++--------------- 1 file changed, 38 insertions(+), 60 deletions(-) diff --git a/auto_process_ngs/commands/report_cmd.py b/auto_process_ngs/commands/report_cmd.py index 60907a4ec..a854b23c6 100644 --- a/auto_process_ngs/commands/report_cmd.py +++ b/auto_process_ngs/commands/report_cmd.py @@ -164,25 +164,23 @@ def report_info(ap): (bcf_utils.pretty_print_names( info.multiplexed_samples.split(',')), project.prettyPrintSamples()) - elif project.info.library_type in ("CellPlex", - "CellPlex scRNA-seq", - "Flex"): - # Fetch implicit multiplexed sample info from config - try: - multi_config = CellrangerMultiConfigCsv( - os.path.join(project.dirn,"10x_multi_config.csv")) - number_of_samples = "%s multiplexed (%s physical)" % \ - (len(multi_config.sample_names), - len(project.samples)) - sample_names = "%s (%s)" % \ - (multi_config.pretty_print_samples(), - project.prettyPrintSamples()) - except FileNotFoundError: - number_of_samples = "%s (physical)" % len(project.samples) - sample_names = project.prettyPrintSamples() else: - number_of_samples = len(project.samples) - sample_names = project.prettyPrintSamples() + multiplexed_samples = get_multiplexed_samples(project) + if multiplexed_samples is not None: + if multiplexed_samples: + number_of_samples = "%s multiplexed (%s physical)" % \ + (len(multiplexed_samples), + len(project.samples)) + sample_names = "%s (%s)" % \ + (bcf_utils.pretty_print_names( + multiplexed_samples), + project.prettyPrintSamples()) + else: + number_of_samples = "%s (physical)" % len(project.samples) + sample_names = project.prettyPrintSamples() + else: + number_of_samples = len(project.samples) + sample_names = project.prettyPrintSamples() report.append("\n- %s" % project.name) report.append(" %s" % ('-'*len(project.name),)) report.append(" User : %s" % info.user) @@ -233,21 +231,15 @@ def report_concise(ap): number_of_samples = len(p.info.multiplexed_samples.\ split(',')) has_multiplexed_samples = True - elif p.info.library_type in ("CellPlex", - "CellPlex scRNA-seq", - "Flex"): - # Fetch implicit multiplexed sample info from config - try: - multi_config = CellrangerMultiConfigCsv( - os.path.join(p.dirn, - "10x_multi_config.csv")) - number_of_samples = len(multi_config.sample_names) + else: + multiplexed_samples = get_multiplexed_samples(p) + if multiplexed_samples: + # Implicit multiplexed sample info from config + number_of_samples = len(multiplexed_samples) has_multiplexed_samples = True - except FileNotFoundError: + else: + # Fall back to physical samples number_of_samples = len(p.samples) - else: - # Physical samples - number_of_samples = len(p.samples) if has_multiplexed_samples: samples = "%s multiplexed sample%s" % \ ('?' if number_of_samples == 0 @@ -466,21 +458,15 @@ def report_summary(ap): number_of_samples = len(project.info.multiplexed_samples.\ split(',')) has_multiplexed_samples = True - elif project.info.library_type in ("CellPlex", - "CellPlex scRNA-seq", - "Flex"): - # Fetch implicit multiplexed sample info from config - try: - multi_config = CellrangerMultiConfigCsv( - os.path.join(project.dirn, - "10x_multi_config.csv")) - number_of_samples = len(multi_config.sample_names) + else: + multiplexed_samples = get_multiplexed_samples(project) + if multiplexed_samples: + # Implicit multiplexed sample info from config + number_of_samples = len(multiplexed_samples) has_multiplexed_samples = True - except FileNotFoundError: + else: + # Physical samples number_of_samples = len(project.samples) - else: - # Physical samples - number_of_samples = len(project.samples) if has_multiplexed_samples: samples = "%s multiplexed sample%s" % \ ('?' if number_of_samples == 0 @@ -649,16 +635,8 @@ def fetch_value(ap,project,field): info = project.info except AttributeError: info = None - # 10x CellPlex and Flex data - multi_config = None - if project.info.library_type in ("CellPlex", - "CellPlex scRNA-seq", - "Flex"): - try: - multi_config = CellrangerMultiConfigCsv( - os.path.join(project.dirn,"10x_multi_config.csv")) - except FileNotFoundError: - pass + # Implicit multiplexed samples + multiplexed_samples = get_multiplexed_samples(project) # Generate value for supplied field name if field == 'datestamp': return IlluminaData.split_run_name(ap.run_name)[0] @@ -707,9 +685,9 @@ def fetch_value(ap,project,field): return '?' else: return str(len(info.multiplexed_samples.split(','))) - elif multi_config: - # Number of multiplexed samples from 10x multi config - return str(len(multi_config.sample_names)) + elif multiplexed_samples: + # Implicit number of multiplexed samples + return str(len(multiplexed_samples)) else: # Number of "physical" samples return str(len(project.samples)) @@ -726,9 +704,9 @@ def fetch_value(ap,project,field): else: return bcf_utils.pretty_print_names( info.multiplexed_samples.split(',')) - elif multi_config: - # Names of multiplexed samples from 10x multi config - return multi_config.pretty_print_samples() + elif multiplexed_samples: + # Names of implicit multiplexed samples + return bcf_utils.pretty_print_names(multiplexed_samples) else: # Names of "physical" samples return project.prettyPrintSamples() From 15d58d5ba412278822e1dffe97a6dc399c7c8d0a Mon Sep 17 00:00:00 2001 From: Peter Briggs Date: Mon, 4 Nov 2024 13:04:24 +0000 Subject: [PATCH 4/4] 'report' command: update reporting of multiplexed samples to handle multiple configs. --- auto_process_ngs/commands/report_cmd.py | 20 +++--- .../test/commands/test_report_cmd.py | 65 +++++++++++++++++++ 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/auto_process_ngs/commands/report_cmd.py b/auto_process_ngs/commands/report_cmd.py index a854b23c6..a224a869b 100644 --- a/auto_process_ngs/commands/report_cmd.py +++ b/auto_process_ngs/commands/report_cmd.py @@ -741,16 +741,16 @@ def get_multiplexed_samples(project): if project.info.library_type in ("CellPlex", "CellPlex scRNA-seq", "Flex"): - # Fetch implicit multiplexed sample info from config - try: - multi_config = CellrangerMultiConfigCsv( - os.path.join(project.dirn,"10x_multi_config.csv")) - # Return multiplexed sample names - return multi_config.sample_names - except FileNotFoundError: - # Multiplexed samples expected but can't be identified - # Return empty list - return [] + # Fetch implicit multiplexed sample info from configs + multiplexed_samples = [] + for f in os.listdir(project.dirn): + if f.startswith("10x_multi_config.") and \ + f.endswith(".csv"): + multi_config = CellrangerMultiConfigCsv( + os.path.join(project.dirn, f)) + multiplexed_samples.extend(multi_config.sample_names) + # Return multiplexed sample names + return sorted(multiplexed_samples) else: # No multiplexed samples expected # Return None diff --git a/auto_process_ngs/test/commands/test_report_cmd.py b/auto_process_ngs/test/commands/test_report_cmd.py index b5ef86b7e..7b0299816 100644 --- a/auto_process_ngs/test/commands/test_report_cmd.py +++ b/auto_process_ngs/test/commands/test_report_cmd.py @@ -2508,6 +2508,71 @@ def test_get_multiplexed_samples_10x_multi_config(self): ABM2,CMO302,ABM2 ABM3,CMO303,ABM3 ABM4,CMO304,ABM4 +""" % (fastq_dir,fastq_dir)) + project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) + self.assertEqual(get_multiplexed_samples(project), + ["ABM1", "ABM2", "ABM3", "ABM4"]) + + def test_get_multiplexed_samples_multiple_10x_multi_configs(self): + """ + report: test 'get_multiplexed_samples' (multiple 10x multi configs) + """ + mockdir = MockAnalysisDirFactory.bcl2fastq2( + '170901_M00879_0087_000000000-AGEW9', + 'miseq', + metadata={ "source": "testing", + "run_number": 87, + "sequencer_model": "MiSeq", + "analysis_number": 2 }, + project_metadata={ + "AB": { "User": "Alison Bell", + "Library type": "CellPlex scRNA-seq", + "Organism": "Human", + "PI": "Audrey Bower", + "Single cell platform": "10xGenomics Chromium 3'v3", + "Number of cells": None, + "Sequencer model": "MiSeq" } + }, + top_dir=self.dirn) + mockdir.create() + # Add cellranger multi config.csv files + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.AB1.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB1,%s,any,AB1,gene expression, +AB1,%s,any,AB1,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM1,CMO301,ABM1 +ABM2,CMO302,ABM2 +""" % (fastq_dir,fastq_dir)) + with open(os.path.join(mockdir.dirn, + "AB", + "10x_multi_config.AB2.csv"),'wt') as fp: + fastq_dir = os.path.join(mockdir.dirn, + "AB", + "fastqs") + fp.write("""[gene-expression] +reference,/data/refdata-cellranger-gex-GRCh38-2020-A + +[libraries] +fastq_id,fastqs,lanes,physical_library_id,feature_types,subsample_rate +AB2,%s,any,AB2,gene expression, +AB2,%s,any,AB2,Multiplexing Capture, + +[samples] +sample_id,cmo_ids,description +ABM3,CMO303,ABM3 +ABM4,CMO304,ABM4 """ % (fastq_dir,fastq_dir)) project = AnalysisProject(os.path.join(mockdir.dirn,'AB')) self.assertEqual(get_multiplexed_samples(project),