Skip to content

Commit

Permalink
feature(io): Load .gtf files and tests
Browse files Browse the repository at this point in the history
Loads `.gtf` file and returns as iterator (as used in `all_introns_counts_and_info.py`)
  • Loading branch information
ns-rse committed Nov 28, 2024
1 parent 60bc830 commit 2bad838
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 12 deletions.
43 changes: 40 additions & 3 deletions isoslam/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ def _get_loader(file_ext: str = "bam") -> Callable: # type: ignore[type-arg]
return _load_bed
if file_ext == ".gtf":
return _load_gtf
if file_ext == ".tbi":
return _load_tbi
if file_ext == ".vcf":
return _load_vcf
if file_ext == ".vcf.gz":
Expand Down Expand Up @@ -237,9 +239,24 @@ def _load_bed() -> None:
return


def _load_gtf() -> None:
"""Load '.bed' file."""
return
def _load_gtf(gtf_file: str | Path) -> pysam.libctabix.tabix_generic_iterator:
"""
Load '.gtf' file and return as an iterable.
Parameters
----------
gtf_file : str | Path
Path, as string or pathlib Path, to a '.gtf' file that is to be loaded.
Returns
-------
pysam.libctabix.tabix_generic_iterator
Iterator of GTF file.
"""
try:
return pysam.tabix_iterator(Path(gtf_file).open(encoding="utf8"), parser=pysam.asGTF())
except FileNotFoundError as e:
raise e


def _load_vcf(vcf_file: str | Path) -> pysam.libcbcf.VariantFile:
Expand All @@ -260,3 +277,23 @@ def _load_vcf(vcf_file: str | Path) -> pysam.libcbcf.VariantFile:
return pysam.VariantFile(vcf_file)
except FileNotFoundError as e:
raise e


def _load_tbi(tbi_file: str | Path) -> pysam.libcbcf.VariantFile:
"""
Load '.tbi' file.
Parameters
----------
tbi_file : str | Path
Path, as string or pathlib Path, to a '.tbi' file that is to be loaded.
Returns
-------
pysam.libcbcf.VariantFile
Loads the specified TBI file.
"""
try:
return pysam.VariantFile(tbi_file)
except FileNotFoundError as e:
raise e
47 changes: 38 additions & 9 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

from isoslam import io

# pylint: disable=protected-access

BASE_DIR = Path.cwd()
RESOURCES = BASE_DIR / "tests" / "resources"

Expand Down Expand Up @@ -65,8 +67,8 @@ def test_read_yaml() -> None:
assert sample_config == CONFIG


def test_create_config(tmp_path: Path) -> None:
"""Test creation of configuration file from default."""
# def test_create_config(tmp_path: Path) -> None:
# """Test creation of configuration file from default."""


@pytest.mark.parametrize(
Expand Down Expand Up @@ -177,24 +179,47 @@ def test_load_bam(
)
def test_load_bed(file_path: str | Path, object_type: str) -> None:
"""Test loading of bed file."""
bed_file = io._load_bam()
bed_file = io._load_bam(file_path)
assert isinstance(bed_file, object_type)


@pytest.mark.skip
@pytest.mark.parametrize(
("file_path", "object_type"),
[
pytest.param(RESOURCES / "gtf" / "test_wash1.gtf", str, id="gtf file as Path"),
pytest.param("tests/resources/gtf/test_wash1.gtf", str, id="gtf file as str"),
pytest.param(
RESOURCES / "gtf" / "test_wash1.gtf", pysam.libctabix.tabix_generic_iterator, id="gtf file as Path"
),
pytest.param(
"tests/resources/gtf/test_wash1.gtf", pysam.libctabix.tabix_generic_iterator, id="gtf file as str"
),
],
)
def test_load_gtf(file_path: str | Path, object_type: str) -> None:
"""Test loading of gtf file."""
gtf_file = io._load_bed()
gtf_file = io._load_gtf(file_path)
assert isinstance(gtf_file, object_type)


@pytest.mark.xfail(reason="File not in correct format.")
@pytest.mark.parametrize(
("file_path", "object_type", "compression", "is_remote"),
[
pytest.param(
RESOURCES / "vcf" / "d0.vcf.gz.tbi", pysam.libcbcf.VariantFile, "BGZF", False, id="d0 tbi as Path"
),
pytest.param("tests/resources/vcf/d0.vcf.gz.tbi", pysam.libcbcf.VariantFile, "BGZF", False, id="d0 tbi as str"),
],
)
def test_load_tbi(
file_path: str | Path, object_type: pysam.libcbcf.VariantFile, compression: str, is_remote: bool
) -> None:
"""Test loading of tbi file."""
tbi_file = io._load_tbi(file_path)
assert isinstance(tbi_file, object_type)
assert tbi_file.compression == compression
assert tbi_file.is_remote == is_remote


@pytest.mark.parametrize(
("file_path", "object_type", "compression", "is_remote"),
[
Expand Down Expand Up @@ -266,8 +291,12 @@ def test_get_loader_value_error(file_ext: str) -> None:
),
# pytest.param(RESOURCES / "bed" / "test_coding_introns.bed", id="bed file as Path"),
# pytest.param("tests/resources/bed/test_coding_introns.bed", id="bed file as str"),
# pytest.param(RESOURCES / "gtf" / "test_wash1.gtf", id="gtf file as Path"),
# pytest.param("tests/resources/gtf/test_wash1.gtf", id="gtf file as str"),
pytest.param(
RESOURCES / "gtf" / "test_wash1.gtf", pysam.libctabix.tabix_generic_iterator, id="gtf file as Path"
),
pytest.param(
"tests/resources/gtf/test_wash1.gtf", pysam.libctabix.tabix_generic_iterator, id="gtf file as str"
),
pytest.param(RESOURCES / "vcf" / "d0.vcf.gz", pysam.libcbcf.VariantFile, id="d0 as Path"),
pytest.param("tests/resources/vcf/d0.vcf.gz", pysam.libcbcf.VariantFile, id="d0 as str"),
],
Expand Down

0 comments on commit 2bad838

Please sign in to comment.