Skip to content

Commit

Permalink
Fix how frame getter/setter attribute is handled
Browse files Browse the repository at this point in the history
  • Loading branch information
jpjarnoux committed Oct 18, 2024
1 parent 760b0f0 commit a9a251f
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 22 deletions.
56 changes: 34 additions & 22 deletions ppanggolin/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ppanggolin.metadata import MetaFeatures
from ppanggolin.utils import get_consecutive_region_positions


class Feature(MetaFeatures):
"""This is a general class representation of Gene, RNA
Expand Down Expand Up @@ -74,9 +75,10 @@ def __len__(self) -> int:
"""

try:
return sum([(stop - start +1) for start, stop in self.coordinates ])
return sum([(stop - start + 1) for start, stop in self.coordinates])
except TypeError:
raise ValueError(f"Coordinates of gene {self} have not been defined. Getting its length is then impossible.")
raise ValueError(
f"Coordinates of gene {self} have not been defined. Getting its length is then impossible.")

@property
def has_joined_coordinates(self) -> bool:
Expand Down Expand Up @@ -141,7 +143,7 @@ def contig(self, contig: Contig):
self._contig = contig

def fill_annotations(self, start: int, stop: int, strand: str, gene_type: str = "", name: str = "",
product: str = "", local_identifier: str = "", coordinates: List[Tuple[int]] = None):
product: str = "", local_identifier: str = "", coordinates: List[Tuple[int, int]] = None):
"""
Fill general annotation for child classes
Expand Down Expand Up @@ -173,21 +175,25 @@ def fill_annotations(self, start: int, stop: int, strand: str, gene_type: str =
if not isinstance(product, str):
raise TypeError(f"Product should be str. Got {type(product)} instead in {self} from {self.organism}.")
if not isinstance(local_identifier, str):
raise TypeError(f"Local identifier should be str. Got {type(local_identifier)} instead in {self} from {self.organism}.")
raise TypeError(
f"Local identifier should be str. Got {type(local_identifier)} instead in {self} from {self.organism}.")
if strand not in ["+", "-"]:
raise ValueError(f"Strand should be '+' or '-'. Got {strand} instead in {self} from {self.organism}.")
if not isinstance(coordinates, list):
raise TypeError(f"Coordinates should be of type list. Got {type(coordinates)} instead in {self} from {self.organism}.")
raise TypeError(
f"Coordinates should be of type list. Got {type(coordinates)} instead in {self} from {self.organism}.")

for start_i, stop_i in coordinates:
if not isinstance(start_i, int):
raise TypeError(f"Start should be int. Got {type(start_i)} instead in {self} from {self.organism}.")
if not isinstance(stop_i, int):
raise TypeError(f"Stop should be int. Got {type(stop_i)} instead in {self} from {self.organism}.")
if stop_i < start_i:
raise ValueError(f"Wrong coordinates: {coordinates}. Start ({start_i}) should not be greater than stop ({stop_i}) in {self} from {self.organism}.")
raise ValueError(
f"Wrong coordinates: {coordinates}. Start ({start_i}) should not be greater than stop ({stop_i}) in {self} from {self.organism}.")
if start_i < 1 or stop_i < 1:
raise ValueError(f"Wrong coordinates: {coordinates}. Start ({start_i}) and stop ({stop_i}) should be greater than 0 in {self} from {self.organism}.")
raise ValueError(
f"Wrong coordinates: {coordinates}. Start ({start_i}) and stop ({stop_i}) should be greater than 0 in {self} from {self.organism}.")

self.start = start
self.stop = stop
Expand Down Expand Up @@ -222,7 +228,8 @@ def add_sequence(self, sequence):
:raise AssertionError: Sequence must be a string
"""
assert isinstance(sequence, str), f"'str' type was expected for dna sequence but you provided a '{type(sequence)}' type object"
assert isinstance(sequence,
str), f"'str' type was expected for dna sequence but you provided a '{type(sequence)}' type object"

self.dna = sequence

Expand All @@ -249,6 +256,7 @@ def stop_relative_to(self, gene):
if gene.start > self.stop:
return self.stop + self.contig.length


class RNA(Feature):
"""Save RNA from genome as an Object with some information for Pangenome
Expand Down Expand Up @@ -286,8 +294,8 @@ def __init__(self, gene_id: str):
self._RGP = None
self.genetic_code = None
self.protein = None
self.is_partial = False # is the gene a partial gene ?
self.frame = 0 # One of '0', '1' or '2'. '0' indicates that the first base of the feature is the first base of a codon, '1' that the second base is the first base of a codon, and so on..
self.is_partial = False # is the gene a partial gene ?
self._frame = None # One of '0', '1' or '2'. '0' indicates that the first base of the feature is the first base of a codon, '1' that the second base is the first base of a codon, and so on..

@property
def family(self):
Expand Down Expand Up @@ -351,7 +359,8 @@ def module(self):
"""
return self.family.module

def fill_annotations(self, position: int = None, genetic_code: int = 11, is_partial:bool = False, frame:int = 0, **kwargs):
def fill_annotations(self, position: int = None, genetic_code: int = 11, is_partial: bool = False, frame: int = 0,
**kwargs):
"""Fill Gene annotation provide by PPanGGOLiN dependencies
:param position: Gene localization in genome
Expand All @@ -368,17 +377,14 @@ def fill_annotations(self, position: int = None, genetic_code: int = 11, is_part
raise TypeError("position should be an integer")
if not isinstance(genetic_code, int):
raise TypeError("Genetic code should be an integer")

if frame not in [0,1,2]:
raise ValueError("Frame should be equal to 0, 1 or 2.")


if not isinstance(is_partial, bool):
raise TypeError("partial code should be an boolean")

self.position = position
self.genetic_code = genetic_code
self.is_partial = is_partial
self._frame = frame
self.frame = frame

def add_protein(self, protein: str):
"""Add a protein sequence corresponding to translated gene
Expand All @@ -397,6 +403,7 @@ def frame(self) -> int:
Get the frame of the gene
"""
assert self._frame is not None, "frame is already set and should not be set another time."

return self._frame

Expand All @@ -406,11 +413,14 @@ def frame(self, frame: int):
:param contig_len: length of the contig
"""
if frame not in [0,1,2]:
assert self._frame is None, "frame is already set and should not be set another time."

if frame not in [0, 1, 2]:
raise ValueError("Frame should be equal to 0, 1 or 2.")

self._frame = frame


class Contig(MetaFeatures):
"""
Describe the contig content and some information
Expand Down Expand Up @@ -776,7 +786,6 @@ def modules(self):
modules.add(module)
yield from modules


def get_ordered_consecutive_genes(self, genes: Iterable[Gene]) -> List[List[Gene]]:
"""
Order the given genes considering the circularity of the contig.
Expand All @@ -787,12 +796,15 @@ def get_ordered_consecutive_genes(self, genes: Iterable[Gene]) -> List[List[Gene
gene_positions = [gene.position for gene in genes]

# Determine consecutive region positions
consecutive_region_positions = get_consecutive_region_positions(region_positions=gene_positions, contig_gene_count=self.number_of_genes)
consecutive_region_positions = get_consecutive_region_positions(region_positions=gene_positions,
contig_gene_count=self.number_of_genes)

consecutive_genes_lists = [[self[position] for position in consecutive_positions] for consecutive_positions in consecutive_region_positions]
consecutive_genes_lists = [[self[position] for position in consecutive_positions] for consecutive_positions in
consecutive_region_positions]

return consecutive_genes_lists


class Organism(MetaFeatures):
"""
Describe the Genome content and some information
Expand Down
50 changes: 50 additions & 0 deletions tests/test_genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def test_create_gene_object(self, gene):
assert gene._RGP is None
assert gene.genetic_code is None
assert gene.protein is None
assert gene.is_partial is False
assert gene._frame is None

def test_fill_annotations(self, gene):
"""Tests that Gene annotations can be filled with valid parameters
Expand All @@ -238,6 +240,54 @@ def test_fill_annotations_type_error(self, gene):
with pytest.raises(TypeError):
gene.fill_annotations(start=1, stop=10, strand='+', position=10, genetic_code="4")

@pytest.mark.parametrize("frame", [0, 1, 2])
def test_set_frame(self, frame):
"""Tests that frame can be set
"""
gene = Gene('gene')
gene.frame = frame
assert gene._frame == frame

@pytest.mark.parametrize("frame", [0, 1, 2])
def test_get_frame(self, frame):
"""Tests that frame can be getting
"""
gene = Gene('gene')
gene.frame = frame
assert gene.frame == frame

def test_raise_assertion_error_if_frame_not_set(self):
"""Tests that frame cannot be return if it has not been set
"""
gene = Gene('gene')
with pytest.raises(AssertionError):
_ = gene.frame

def test_raise_assertion_error_if_frame_already_set(self):
"""Tests that frame cannot be set if it has already been set
"""
gene = Gene('gene')
gene.frame = 1
with pytest.raises(AssertionError):
gene.frame = 2

@pytest.mark.parametrize("frame", [3, "1", 1.5])
def test_raise_value_error_if_frame_not_0_1_or_2(self, frame):
"""Tests that frame cannot be set with value different from 0, 1 or 2
"""
gene = Gene('gene')
with pytest.raises(ValueError):
gene.frame = frame

@pytest.mark.parametrize("frame", [0, 1, 2])
def test_fill_partial_gene(self, frame):
"""Tests that Gene annotations can be filled with partial genes
"""
gene = Gene('gene')
gene.fill_annotations(start=1, stop=10, strand='+', is_partial=True, frame=frame)
assert gene.is_partial is True
assert gene.frame == frame

def test_add_protein(self, gene):
"""Tests that a protein sequence can be added to a Gene object
"""
Expand Down

0 comments on commit a9a251f

Please sign in to comment.