From a9a251fc214e6cf916dad26d51c97876843689bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Arnoux?= Date: Fri, 18 Oct 2024 10:22:22 +0200 Subject: [PATCH] Fix how frame getter/setter attribute is handled --- ppanggolin/genome.py | 56 +++++++++++++++++++++++++++----------------- tests/test_genome.py | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 22 deletions(-) diff --git a/ppanggolin/genome.py b/ppanggolin/genome.py index eba6e423..83bf971c 100644 --- a/ppanggolin/genome.py +++ b/ppanggolin/genome.py @@ -12,6 +12,7 @@ from ppanggolin.metadata import MetaFeatures from ppanggolin.utils import get_consecutive_region_positions + class Feature(MetaFeatures): """This is a general class representation of Gene, RNA @@ -74,9 +75,10 @@ def __len__(self) -> int: """ try: - return sum([(stop - start +1) for start, stop in self.coordinates ]) + return sum([(stop - start + 1) for start, stop in self.coordinates]) except TypeError: - raise ValueError(f"Coordinates of gene {self} have not been defined. Getting its length is then impossible.") + raise ValueError( + f"Coordinates of gene {self} have not been defined. Getting its length is then impossible.") @property def has_joined_coordinates(self) -> bool: @@ -141,7 +143,7 @@ def contig(self, contig: Contig): self._contig = contig def fill_annotations(self, start: int, stop: int, strand: str, gene_type: str = "", name: str = "", - product: str = "", local_identifier: str = "", coordinates: List[Tuple[int]] = None): + product: str = "", local_identifier: str = "", coordinates: List[Tuple[int, int]] = None): """ Fill general annotation for child classes @@ -173,11 +175,13 @@ def fill_annotations(self, start: int, stop: int, strand: str, gene_type: str = if not isinstance(product, str): raise TypeError(f"Product should be str. Got {type(product)} instead in {self} from {self.organism}.") if not isinstance(local_identifier, str): - raise TypeError(f"Local identifier should be str. Got {type(local_identifier)} instead in {self} from {self.organism}.") + raise TypeError( + f"Local identifier should be str. Got {type(local_identifier)} instead in {self} from {self.organism}.") if strand not in ["+", "-"]: raise ValueError(f"Strand should be '+' or '-'. Got {strand} instead in {self} from {self.organism}.") if not isinstance(coordinates, list): - raise TypeError(f"Coordinates should be of type list. Got {type(coordinates)} instead in {self} from {self.organism}.") + raise TypeError( + f"Coordinates should be of type list. Got {type(coordinates)} instead in {self} from {self.organism}.") for start_i, stop_i in coordinates: if not isinstance(start_i, int): @@ -185,9 +189,11 @@ def fill_annotations(self, start: int, stop: int, strand: str, gene_type: str = if not isinstance(stop_i, int): raise TypeError(f"Stop should be int. Got {type(stop_i)} instead in {self} from {self.organism}.") if stop_i < start_i: - raise ValueError(f"Wrong coordinates: {coordinates}. Start ({start_i}) should not be greater than stop ({stop_i}) in {self} from {self.organism}.") + raise ValueError( + f"Wrong coordinates: {coordinates}. Start ({start_i}) should not be greater than stop ({stop_i}) in {self} from {self.organism}.") if start_i < 1 or stop_i < 1: - raise ValueError(f"Wrong coordinates: {coordinates}. Start ({start_i}) and stop ({stop_i}) should be greater than 0 in {self} from {self.organism}.") + raise ValueError( + f"Wrong coordinates: {coordinates}. Start ({start_i}) and stop ({stop_i}) should be greater than 0 in {self} from {self.organism}.") self.start = start self.stop = stop @@ -222,7 +228,8 @@ def add_sequence(self, sequence): :raise AssertionError: Sequence must be a string """ - assert isinstance(sequence, str), f"'str' type was expected for dna sequence but you provided a '{type(sequence)}' type object" + assert isinstance(sequence, + str), f"'str' type was expected for dna sequence but you provided a '{type(sequence)}' type object" self.dna = sequence @@ -249,6 +256,7 @@ def stop_relative_to(self, gene): if gene.start > self.stop: return self.stop + self.contig.length + class RNA(Feature): """Save RNA from genome as an Object with some information for Pangenome @@ -286,8 +294,8 @@ def __init__(self, gene_id: str): self._RGP = None self.genetic_code = None self.protein = None - self.is_partial = False # is the gene a partial gene ? - self.frame = 0 # One of '0', '1' or '2'. '0' indicates that the first base of the feature is the first base of a codon, '1' that the second base is the first base of a codon, and so on.. + self.is_partial = False # is the gene a partial gene ? + self._frame = None # One of '0', '1' or '2'. '0' indicates that the first base of the feature is the first base of a codon, '1' that the second base is the first base of a codon, and so on.. @property def family(self): @@ -351,7 +359,8 @@ def module(self): """ return self.family.module - def fill_annotations(self, position: int = None, genetic_code: int = 11, is_partial:bool = False, frame:int = 0, **kwargs): + def fill_annotations(self, position: int = None, genetic_code: int = 11, is_partial: bool = False, frame: int = 0, + **kwargs): """Fill Gene annotation provide by PPanGGOLiN dependencies :param position: Gene localization in genome @@ -368,17 +377,14 @@ def fill_annotations(self, position: int = None, genetic_code: int = 11, is_part raise TypeError("position should be an integer") if not isinstance(genetic_code, int): raise TypeError("Genetic code should be an integer") - - if frame not in [0,1,2]: - raise ValueError("Frame should be equal to 0, 1 or 2.") - + if not isinstance(is_partial, bool): raise TypeError("partial code should be an boolean") - + self.position = position self.genetic_code = genetic_code self.is_partial = is_partial - self._frame = frame + self.frame = frame def add_protein(self, protein: str): """Add a protein sequence corresponding to translated gene @@ -397,6 +403,7 @@ def frame(self) -> int: Get the frame of the gene """ + assert self._frame is not None, "frame is already set and should not be set another time." return self._frame @@ -406,11 +413,14 @@ def frame(self, frame: int): :param contig_len: length of the contig """ - if frame not in [0,1,2]: + assert self._frame is None, "frame is already set and should not be set another time." + + if frame not in [0, 1, 2]: raise ValueError("Frame should be equal to 0, 1 or 2.") - + self._frame = frame + class Contig(MetaFeatures): """ Describe the contig content and some information @@ -776,7 +786,6 @@ def modules(self): modules.add(module) yield from modules - def get_ordered_consecutive_genes(self, genes: Iterable[Gene]) -> List[List[Gene]]: """ Order the given genes considering the circularity of the contig. @@ -787,12 +796,15 @@ def get_ordered_consecutive_genes(self, genes: Iterable[Gene]) -> List[List[Gene gene_positions = [gene.position for gene in genes] # Determine consecutive region positions - consecutive_region_positions = get_consecutive_region_positions(region_positions=gene_positions, contig_gene_count=self.number_of_genes) + consecutive_region_positions = get_consecutive_region_positions(region_positions=gene_positions, + contig_gene_count=self.number_of_genes) - consecutive_genes_lists = [[self[position] for position in consecutive_positions] for consecutive_positions in consecutive_region_positions] + consecutive_genes_lists = [[self[position] for position in consecutive_positions] for consecutive_positions in + consecutive_region_positions] return consecutive_genes_lists + class Organism(MetaFeatures): """ Describe the Genome content and some information diff --git a/tests/test_genome.py b/tests/test_genome.py index 6f519a74..9e29133f 100644 --- a/tests/test_genome.py +++ b/tests/test_genome.py @@ -222,6 +222,8 @@ def test_create_gene_object(self, gene): assert gene._RGP is None assert gene.genetic_code is None assert gene.protein is None + assert gene.is_partial is False + assert gene._frame is None def test_fill_annotations(self, gene): """Tests that Gene annotations can be filled with valid parameters @@ -238,6 +240,54 @@ def test_fill_annotations_type_error(self, gene): with pytest.raises(TypeError): gene.fill_annotations(start=1, stop=10, strand='+', position=10, genetic_code="4") + @pytest.mark.parametrize("frame", [0, 1, 2]) + def test_set_frame(self, frame): + """Tests that frame can be set + """ + gene = Gene('gene') + gene.frame = frame + assert gene._frame == frame + + @pytest.mark.parametrize("frame", [0, 1, 2]) + def test_get_frame(self, frame): + """Tests that frame can be getting + """ + gene = Gene('gene') + gene.frame = frame + assert gene.frame == frame + + def test_raise_assertion_error_if_frame_not_set(self): + """Tests that frame cannot be return if it has not been set + """ + gene = Gene('gene') + with pytest.raises(AssertionError): + _ = gene.frame + + def test_raise_assertion_error_if_frame_already_set(self): + """Tests that frame cannot be set if it has already been set + """ + gene = Gene('gene') + gene.frame = 1 + with pytest.raises(AssertionError): + gene.frame = 2 + + @pytest.mark.parametrize("frame", [3, "1", 1.5]) + def test_raise_value_error_if_frame_not_0_1_or_2(self, frame): + """Tests that frame cannot be set with value different from 0, 1 or 2 + """ + gene = Gene('gene') + with pytest.raises(ValueError): + gene.frame = frame + + @pytest.mark.parametrize("frame", [0, 1, 2]) + def test_fill_partial_gene(self, frame): + """Tests that Gene annotations can be filled with partial genes + """ + gene = Gene('gene') + gene.fill_annotations(start=1, stop=10, strand='+', is_partial=True, frame=frame) + assert gene.is_partial is True + assert gene.frame == frame + def test_add_protein(self, gene): """Tests that a protein sequence can be added to a Gene object """