diff --git a/isoslam/io.py b/isoslam/io.py index d3d3674..c476c20 100644 --- a/isoslam/io.py +++ b/isoslam/io.py @@ -1,11 +1,12 @@ """Module for reading files.""" import argparse +import gzip from collections.abc import Callable from datetime import datetime from importlib import resources from pathlib import Path -from typing import Any +from typing import Any, TextIO import pysam @@ -201,15 +202,13 @@ def _get_loader(file_ext: str = "bam") -> Callable: # type: ignore[type-arg] """ if file_ext == ".bam": return _load_bam - if file_ext == ".bed": + if file_ext == ".bed" or file_ext == ".bed.gz": return _load_bed if file_ext == ".gtf": return _load_gtf if file_ext == ".tbi": return _load_tbi - if file_ext == ".vcf": - return _load_vcf - if file_ext == ".vcf.gz": + if file_ext == ".vcf" or file_ext == ".vcf.gz": return _load_vcf raise ValueError(file_ext) @@ -234,9 +233,26 @@ def _load_bam(bam_file: str | Path) -> pysam.libcalignmentfile.AlignmentFile: raise e -def _load_bed() -> None: - """Load '.bed' file.""" - return +def _load_bed(bed_file: str | Path) -> TextIO: + """ + Open '.bed' file for reading, supports gzip compressed formats. + + Parameters + ---------- + bed_file : str | Path + Path, as string or pathlib Path, to a '.bed' or '.bed.gz' file that is to be loaded. + + Returns + ------- + TextIO + Returns a connection to an open file object. + """ + try: + if Path(bed_file).suffix == ".gz": + return gzip.open(bed_file, "rt", encoding="utf-8") + return Path(bed_file).open(mode="r", encoding="utf-8") + except OSError as e: + raise e def _load_gtf(gtf_file: str | Path) -> pysam.libctabix.tabix_generic_iterator: diff --git a/tests/test_io.py b/tests/test_io.py index ec4f934..3e093b8 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -3,8 +3,9 @@ import argparse from collections.abc import Callable from datetime import datetime +from io import TextIOWrapper from pathlib import Path -from typing import Any +from typing import Any, TextIO import pysam import pytest @@ -44,7 +45,7 @@ def test_str_to_path(tmp_path: Path) -> None: def test_path_to_str(tmp_path: Path) -> None: - """Test that Path objects are converted to strings.""" + """Test that Path objects in dictionaries are converted to strings.""" CONFIG_PATH = { "this": "is", "a": "test", @@ -169,17 +170,16 @@ def test_load_bam( assert bam_file.compression == compression -@pytest.mark.skip @pytest.mark.parametrize( ("file_path", "object_type"), [ - pytest.param(RESOURCES / "bed" / "test_coding_introns.bed", str, id="bed file as Path"), - pytest.param("tests/resources/bed/test_coding_introns.bed", str, id="bed file as str"), + pytest.param(RESOURCES / "bed" / "test_coding_introns.bed", TextIOWrapper, id="bed file as Path"), + pytest.param("tests/resources/bed/test_coding_introns.bed", TextIOWrapper, id="bed file as str"), ], ) -def test_load_bed(file_path: str | Path, object_type: str) -> None: +def test_load_bed(file_path: str | Path, object_type: TextIO) -> None: """Test loading of bed file.""" - bed_file = io._load_bam(file_path) + bed_file = io._load_bed(file_path) assert isinstance(bed_file, object_type)