From f6204a1f33b8bbbcf4b790d4d7cb0b7100bf71a3 Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Thu, 6 Jun 2024 10:43:21 +0800 Subject: [PATCH 1/3] add support for unmmapped bams --- CHANGELOG.md | 4 ++-- pipeval/validate/__main__.py | 4 +++- pipeval/validate/validate.py | 1 + pipeval/validate/validate_types.py | 2 +- pipeval/validate/validators/bam.py | 9 +++++--- test/unit/test_validate.py | 34 +++++++++++++++++++++++++----- 6 files changed, 42 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6039e50..c80fb43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added - Add validation flowchart - +- Add support for unmapped BAM --- ## [5.0.0] - 2024-02-16 @@ -72,7 +72,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Fixed -### Removed +### Removed - Remove deprecated parameter options from README - Remove inaccessible design doc link from README - Remove directory checking diff --git a/pipeval/validate/__main__.py b/pipeval/validate/__main__.py index 11f67b8..24b6df7 100644 --- a/pipeval/validate/__main__.py +++ b/pipeval/validate/__main__.py @@ -16,7 +16,7 @@ def positive_integer(arg): def add_subparser_validate(subparsers:argparse._SubParsersAction): """ Parse arguments """ - parser = subparsers.add_parser( + parser:argparse.ArgumentParser = subparsers.add_parser( name = 'validate', help = 'Validate one or more file(s)', description = 'Validate one or more file(s)', @@ -26,6 +26,8 @@ def add_subparser_validate(subparsers:argparse._SubParsersAction): parser.add_argument('path', help='One or more paths of files to validate', type=str, nargs='+') parser.add_argument('-r', '--cram-reference', default=None, \ help='Path to reference file for CRAM') + parser.add_argument('-u', '--unmapped-bam', action='store_true', + help='Input is unmmapped BAM.') parser.add_argument('-p', '--processes', type=positive_integer, default=1, \ help='Number of processes to run in parallel when validating multiple files') parser.add_argument('-t', '--test-integrity', action='store_true', \ diff --git a/pipeval/validate/validate.py b/pipeval/validate/validate.py index a54c1ad..52ffbbe 100644 --- a/pipeval/validate/validate.py +++ b/pipeval/validate/validate.py @@ -47,6 +47,7 @@ def _validate_file( `args` must contain the following: `path` is a required argument with a value of list of files `cram_reference` is a required argument with either a string value or None + `unmapped_bam` is a required argument of boolean variable ''' _path_exists(path) diff --git a/pipeval/validate/validate_types.py b/pipeval/validate/validate_types.py index 1c91e1d..2773fea 100644 --- a/pipeval/validate/validate_types.py +++ b/pipeval/validate/validate_types.py @@ -3,5 +3,5 @@ ValidateArgs = namedtuple( 'args', - 'path, cram_reference, processes, test_integrity' + 'path, cram_reference, unmapped_bam, processes, test_integrity' ) diff --git a/pipeval/validate/validators/bam.py b/pipeval/validate/validators/bam.py index 52749aa..4702765 100644 --- a/pipeval/validate/validators/bam.py +++ b/pipeval/validate/validators/bam.py @@ -6,10 +6,13 @@ from pipeval.validate.validate_types import ValidateArgs -def _validate_bam_file(path:Path): +def _validate_bam_file(path:Path, unmapped_bam:bool): '''Validates bam file''' + args = [str(path)] + if unmapped_bam: + args.append('-u') try: - pysam.quickcheck(str(path)) + pysam.quickcheck(*args) except pysam.SamtoolsError as err: raise ValueError("samtools bam check failed. " + str(err)) from err @@ -35,5 +38,5 @@ def _check_bam(path:Path, args:Union[ValidateArgs,Dict[str, Union[str,list]]]): `args` must contains the following: `cram_reference` is a required key with either a string value or None ''' - _validate_bam_file(path) + _validate_bam_file(path, args.unmapped_bam) _check_bam_index(path) diff --git a/test/unit/test_validate.py b/test/unit/test_validate.py index 86a6128..5c620ea 100644 --- a/test/unit/test_validate.py +++ b/test/unit/test_validate.py @@ -110,7 +110,13 @@ def test__path_exists__errors_for_non_existing_path(mock_path): @mock.patch('pipeval.validate.files.Path', autospec=True) def test__check_compressed__raises_warning_for_uncompressed_path(mock_path, mock_magic): mock_magic.return_value = 'text/plain' - test_args = ValidateArgs(path=[], cram_reference=None, processes=1, test_integrity=False) + test_args = ValidateArgs( + path=[], + cram_reference=None, + unmapped_bam=False, + processes=1, + test_integrity=False + ) with pytest.warns(UserWarning): _check_compressed(mock_path, test_args) @@ -132,7 +138,13 @@ def test__check_compressed__passes_compression_check( compression_mime): mock_magic.return_value = compression_mime mock_integrity.return_value = None - test_args = ValidateArgs(path=[], cram_reference=None, processes=1, test_integrity=False) + test_args = ValidateArgs( + path=[], + cram_reference=None, + unmapped_bam=False, + processes=1, + test_integrity=False + ) with warnings.catch_warnings(): warnings.filterwarnings("error") @@ -147,14 +159,14 @@ def test__validate_bam_file__empty_bam_file(mock_pysam): test_path = Path('empty/valid/bam') with pytest.raises(ValueError): - _validate_bam_file(test_path) + _validate_bam_file(test_path, unmapped_bam=False) @mock.patch('pipeval.validate.validators.bam.Path', autospec=True) def test__validate_bam_file__quickcheck_fails(mock_path): mock_path.exists.return_value = False with pytest.raises(ValueError): - _validate_bam_file(mock_path) + _validate_bam_file(mock_path, unmapped_bam=False) @mock.patch('pipeval.validate.validators.bam.pysam', autospec=True) def test__check_bam_index__no_index_file_error(mock_pysam): @@ -247,7 +259,13 @@ def test__validate_vcf_file__passes_vcf_validation(mock_call): _validate_vcf_file('some/file') def test__run_validate__passes_validation_no_files(): - test_args = ValidateArgs(path=[], cram_reference=None, processes=1, test_integrity=False) + test_args = ValidateArgs( + path=[], + cram_reference=None, + unmapped_bam=False, + processes=1, + test_integrity=False + ) run_validate(test_args) @pytest.mark.parametrize( @@ -273,6 +291,7 @@ def test___validation_worker__fails_with_failing_checks( test_args = ValidateArgs( path=[test_path], cram_reference=None, + unmapped_bam=False, processes=1, test_integrity=False) mock_path_resolve.return_value = test_path @@ -292,6 +311,7 @@ def test__run_validate__passes_on_all_valid_files( test_args = ValidateArgs( path=[test_path], cram_reference=None, + unmapped_bam=False, processes=1, test_integrity=False) @@ -309,6 +329,7 @@ def test__run_validate__fails_with_failing_file( test_args = ValidateArgs( path=[test_path], cram_reference=None, + unmapped_bam=False, processes=1, test_integrity=False) expected_code = 1 @@ -352,6 +373,7 @@ def test__validate_file__checks_compression( test_args = ValidateArgs( path=[], cram_reference=None, + unmapped_bam=False, processes=1, test_integrity=False) @@ -369,6 +391,7 @@ def test__run_validate__fails_on_unresolvable_symlink(mock_path_resolve): test_args = ValidateArgs( path=[test_path], cram_reference=None, + unmapped_bam=False, processes=1, test_integrity=False) @@ -394,6 +417,7 @@ def test___validation_worker__passes_proper_validation( test_args = ValidateArgs( path=[test_path], cram_reference=None, + unmapped_bam=False, processes=1, test_integrity=False) From b7539ead73440af76cb55110d534a645744e4086 Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Thu, 6 Jun 2024 13:23:56 +0800 Subject: [PATCH 2/3] fix _validate_bam_file for ubam --- pipeval/validate/validators/bam.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pipeval/validate/validators/bam.py b/pipeval/validate/validators/bam.py index 4702765..a14aca2 100644 --- a/pipeval/validate/validators/bam.py +++ b/pipeval/validate/validators/bam.py @@ -16,7 +16,12 @@ def _validate_bam_file(path:Path, unmapped_bam:bool): except pysam.SamtoolsError as err: raise ValueError("samtools bam check failed. " + str(err)) from err - bam_head: pysam.IteratorRowHead = pysam.AlignmentFile(str(path)).head(1) + kwargs = { + 'filename': str(path) + } + if unmapped_bam: + kwargs['check_sq'] = False + bam_head: pysam.IteratorRowHead = pysam.AlignmentFile(**kwargs).head(1) if next(bam_head, None) is None: raise ValueError("pysam bam check failed. No reads in " + str(path)) From 7bc3e64ea449df90188b2afed0ec585880af5eaf Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Sun, 23 Jun 2024 20:52:44 -0700 Subject: [PATCH 3/3] Add tests to make sure proper calls made with unmapped option --- test/unit/test_validate.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/test/unit/test_validate.py b/test/unit/test_validate.py index 5c620ea..818843c 100644 --- a/test/unit/test_validate.py +++ b/test/unit/test_validate.py @@ -2,7 +2,7 @@ # pylint: disable=C0114 from pathlib import Path from argparse import Namespace, ArgumentTypeError -from unittest.mock import Mock, mock_open +from unittest.mock import Mock, mock_open, MagicMock import warnings import zlib import gzip @@ -161,6 +161,38 @@ def test__validate_bam_file__empty_bam_file(mock_pysam): with pytest.raises(ValueError): _validate_bam_file(test_path, unmapped_bam=False) +@mock.patch('pipeval.validate.validators.bam.pysam') +def test__validate_bam_file__quickcheck_called_with_unmapped(mock_pysam): + mock_alignment_file = Mock() + mock_alignment_file.head.return_value = iter(['read1']) + mock_quickcheck = Mock() + + mock_pysam.quickcheck = mock_quickcheck + mock_pysam.AlignmentFile.return_value = mock_alignment_file + + test_path = 'empty/valid/bam' + test_unmapped_option = '-u' + + _validate_bam_file(test_path, unmapped_bam=True) + mock_quickcheck.assert_called_with(test_path, test_unmapped_option) + +@mock.patch('pipeval.validate.validators.bam.pysam') +def test__validate_bam_file__alignmentfile_called_with_unmapped(mock_pysam): + mock_alignment_file = MagicMock() + mock_alignment_file.__iter__.return_value = ['read1'] + mock_quickcheck = Mock() + + mock_pysam.quickcheck = mock_quickcheck + mock_pysam.AlignmentFile = mock_alignment_file + + test_path = 'empty/valid/bam' + test_unmapped_option = False + + _validate_bam_file(test_path, unmapped_bam=True) + mock_alignment_file.assert_called_with( + **{'filename': test_path, 'check_sq': test_unmapped_option} + ) + @mock.patch('pipeval.validate.validators.bam.Path', autospec=True) def test__validate_bam_file__quickcheck_fails(mock_path): mock_path.exists.return_value = False