From 5e048151d2314d8e3234cc46f73c7522d3bcb924 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Wed, 15 May 2019 08:54:58 -0700 Subject: [PATCH] AttachBarcodes generic entrypoint -- fix bugs & add test (#66) Fixes a few plumbing issues in the generic AttachBarcodes entry point (which allows command-line specification of the in-read positions of all barcodes). The code path with all three of (i) sample barcode in I1 fastq, (ii) cell barcode whitelist, and (iii) cell/UMI barcodes in R1 fastq as usual, had some typing issues; fixes these and tests them. --- src/sctools/platform.py | 7 ++-- src/sctools/test/test_entrypoints.py | 55 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/src/sctools/platform.py b/src/sctools/platform.py index 22da63c..272b693 100644 --- a/src/sctools/platform.py +++ b/src/sctools/platform.py @@ -935,15 +935,16 @@ def _make_tag_generators( barcode_args = {'fastq_files': r1} if i1: - barcode_args['embedded_barcodes'] = [cls.sample_barcode] - tag_generators.append(fastq.EmbeddedBarcodeGenerator(**barcode_args)) + sample_barcode_args = dict(barcode_args) + sample_barcode_args['embedded_barcodes'] = [cls.sample_barcode] + tag_generators.append(fastq.EmbeddedBarcodeGenerator(**sample_barcode_args)) if whitelist: barcode_args['whitelist'] = whitelist if cls.cell_barcode: barcode_args['embedded_cell_barcode'] = cls.cell_barcode if cls.molecule_barcode: - barcode_args['other_embedded_barcodes'] = cls.molecule_barcode + barcode_args['other_embedded_barcodes'] = [cls.molecule_barcode] tag_generators.append( fastq.BarcodeGeneratorWithCorrectedCellBarcodes(**barcode_args) ) diff --git a/src/sctools/test/test_entrypoints.py b/src/sctools/test/test_entrypoints.py index 44f114e..cb646a4 100644 --- a/src/sctools/test/test_entrypoints.py +++ b/src/sctools/test/test_entrypoints.py @@ -86,6 +86,61 @@ def test_Attach10XBarcodes_entrypoint_with_whitelist(): os.remove('test_tagged_bam.bam') # clean up +def test_AttachBarcodes_entrypoint_with_whitelist(): + # test of the BarcodePlatform.attach_barcodes entry point with + # sample, cell, and molecule barcodes all specified + args = [ + '--r1', + data_dir + 'test_r1.fastq', + '--i1', + data_dir + 'test_i7.fastq', + '--u2', + data_dir + 'test.bam', + '--output-bamfile', + 'test_tagged_bam.bam', + '--whitelist', + data_dir + '1k-august-2016.txt', + "--sample-barcode-start-position", + "0", + "--sample-barcode-length", + "8", + "--cell-barcode-start-position", + "0", + "--cell-barcode-length", + "16", + "--molecule-barcode-start-position", + "16", + "--molecule-barcode-length", + "7", # changed 10>7 intentionally for test + ] + + return_call = platform.BarcodePlatform.attach_barcodes(args) + assert return_call == 0 + success = False + with pysam.AlignmentFile('test_tagged_bam.bam', 'rb', check_sq=False) as f: + for alignment in f: + if alignment.has_tag(consts.CELL_BARCODE_TAG_KEY): + success = True + # each alignment should now have a tag, and that tag should be a string + assert isinstance(alignment.get_tag(consts.RAW_CELL_BARCODE_TAG_KEY), str) + assert isinstance( + alignment.get_tag(consts.QUALITY_CELL_BARCODE_TAG_KEY), str + ) + assert isinstance( + alignment.get_tag(consts.RAW_MOLECULE_BARCODE_TAG_KEY), str + ) + assert len(alignment.get_tag(consts.RAW_MOLECULE_BARCODE_TAG_KEY)) == 7 + assert isinstance( + alignment.get_tag(consts.QUALITY_MOLECULE_BARCODE_TAG_KEY), str + ) + assert isinstance(alignment.get_tag(consts.RAW_SAMPLE_BARCODE_TAG_KEY), str) + assert isinstance( + alignment.get_tag(consts.QUALITY_SAMPLE_BARCODE_TAG_KEY), str + ) + assert success + os.remove('test_tagged_bam.bam') # clean up + + def test_split_bam(): tag_args = [ '--r1',