Skip to content

Commit

Permalink
Merge pull request #97 from sudlab/ns-rse/86-bed-dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
ns-rse authored Nov 29, 2024
2 parents edd7c28 + 0737c50 commit e2f51dc
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
37 changes: 37 additions & 0 deletions isoslam/isoslam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""IsoSLAM module."""

from collections import defaultdict
from pathlib import Path
from typing import Any

from isoslam import io


def extract_transcripts(bed_file: str | Path) -> dict[Any, list[tuple[Any, int, int, Any, Any]]]:
"""
Extract features from `.bed` file and return as a dictionary indexed by transcript_id.
Parameters
----------
bed_file : str | Path
Path, as string or pathlib Path, to a `.bed` file.
Returns
-------
dict[Any, list[tuple[Any, int, int, Any, Any]]]
Nested dictionary of chromosome, start, end and bedstrand indexed by transcript_id.
"""
coordinates = defaultdict(list)
for line in io.load_file(bed_file):
contents = line.strip().split("\t")
transcript_id = contents[3].replace("_intron", "")
coordinates[transcript_id].append(
(
contents[0],
int(contents[1]),
int(contents[2]),
transcript_id,
contents[5],
)
)
return coordinates
42 changes: 42 additions & 0 deletions tests/test_isoslam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Tests for the isoslam module."""

from pathlib import Path
from typing import Any

import pytest # type: ignore[import-not-found]

from isoslam import isoslam

BASE_DIR = Path.cwd()
RESOURCES = BASE_DIR / "tests" / "resources"


@pytest.mark.parametrize(
("bed_file", "expected_transcript"),
[
pytest.param( # type: ignore[misc]
RESOURCES / "bed" / "test_coding_introns.bed",
{
"ENST00000442898": [
("9", 14940, 15080, "ENST00000442898", "-"),
("9", 15149, 15908, "ENST00000442898", "-"),
("9", 16061, 16717, "ENST00000442898", "-"),
("9", 16876, 16964, "ENST00000442898", "-"),
("9", 17166, 17343, "ENST00000442898", "-"),
("9", 17479, 17718, "ENST00000442898", "-"),
("9", 17855, 18027, "ENST00000442898", "-"),
("9", 18174, 18380, "ENST00000442898", "-"),
("9", 18492, 24850, "ENST00000442898", "-"),
("9", 25004, 29601, "ENST00000442898", "-"),
]
},
id="bed coding introons",
),
],
)
def test_isoslam_extract_transcripts(
bed_file: str | Path,
expected_transcript: dict[Any, list[tuple[Any, int, int, Any, Any]]],
) -> None:
"""Test extraction of tanscript data from bed file using extract_transcripts()."""
assert isoslam.extract_transcripts(bed_file) == expected_transcript

0 comments on commit e2f51dc

Please sign in to comment.