-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature(isoslam): Adds functions to extract features from reads
Closes #88 Adds functions to extract features from a read both individual `pysam.AlignedSegment` and pairs of reads returned from the `pysam.fetch()` iterator. - Includes tests. - Extracts - `start` - `end` - `length` - tag for `XS` if present, otherwise `None` - tag for `XT` if present, otherwise `None` - zips up the blocks and returns them as `block_start` and `block_end` Tests use fixtures defined in `tests/conftest.py` and I had trouble trying to moock `pysam.AlignedSegment` as a number of the attributes I wanted to mock were immutable. I therefore opted to load a `.bam` file and extract some individual `AlignedSegments` as features and use these in the parameterised tests. However, confusingly the value I used for `start` in the `fetch()` call did not match the first item returned by the generator (this can be seen by the disconnect between the `start` parameters in the `fetch()` calls and the names of the fixtures which reflect the values that are tested and used in the parameter names). I'm not sure why this is, but one consequence of it is that I'm yet to have extracted any `AlignedSegment` where the `get_tag("XS") == Aligned` and so in turn these are all `None`, similarly there are no `get_tag("XT")` (the transcript id?). Guidance on how to identify and extract these would be very welcome (I just stuck some `print()` statements into the section of `all_introns_counts_and_info.py` where the `get_tag("XT") calls were made to show the `read1_start` at that point, very crude as I'm still fumbling my way around these files and their specification!).
- Loading branch information
Showing
3 changed files
with
241 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
"""Fixtures for pytest.""" | ||
|
||
from pathlib import Path | ||
|
||
import pytest | ||
from pysam import AlignedSegment, AlignmentFile | ||
|
||
from isoslam import io | ||
|
||
BASE_DIR = Path.cwd() | ||
TEST_DIR = BASE_DIR / "tests" | ||
RESOURCES = TEST_DIR / "resources" | ||
GTF_DIR = RESOURCES / "gtf" | ||
BED_DIR = RESOURCES / "bed" | ||
VCF_DIR = RESOURCES / "vcf" | ||
BAM_DIR = RESOURCES / "bam" | ||
|
||
|
||
# pylint: disable=redefined-outer-name | ||
|
||
|
||
@pytest.fixture() | ||
def bam_file() -> AlignmentFile: | ||
"""Load a bam file.""" | ||
return io.load_file(BAM_DIR / "d0_no4sU_filtered_remapped_sorted.bam") | ||
|
||
|
||
@pytest.fixture() | ||
def aligned_segment_28584(bam_file: AlignmentFile) -> AlignedSegment: | ||
"""Extract an individual AlignedSegment from a ``.bam`` file.""" | ||
return next(bam_file.fetch(contig="chr9", start=28592, end=28593)) | ||
|
||
|
||
@pytest.fixture() | ||
def aligned_segment_17416(bam_file: AlignmentFile) -> AlignedSegment: | ||
"""Extract an individual AlignedSegment from a ``.bam`` file.""" | ||
# @ns-rse : I have no idea why the generator doesn't return AlignedSegments that match the start/end here | ||
return next(bam_file.fetch(contig="chr9", start=17804, end=18126)) | ||
|
||
|
||
@pytest.fixture() | ||
def aligned_segment_18029(bam_file: AlignmentFile) -> AlignedSegment: | ||
"""Extract an individual AlignedSegment from a ``.bam`` file.""" | ||
# @ns-rse : I have no idea why the generator doesn't return AlignedSegments that match the start/end here | ||
return next(bam_file.fetch(contig="chr9", start=18156, end=24870)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters