diff --git a/src/gopreprocess/file_processors/gaf_processor.py b/src/gopreprocess/file_processors/gaf_processor.py index 656baa5..a62b529 100644 --- a/src/gopreprocess/file_processors/gaf_processor.py +++ b/src/gopreprocess/file_processors/gaf_processor.py @@ -110,14 +110,9 @@ def parse_ortho_gaf(self): continue # remove annotations that don't have a subject in the namespaces we're interested in if str(source_assoc.evidence.type) not in experimental_evidence_codes: continue - if ( - source_assoc.provided_by == self.taxon_to_provider[self.target_taxon] - or source_assoc.provided_by == "GO_Central" - ): + if source_assoc.provided_by == self.taxon_to_provider[self.target_taxon] or source_assoc.provided_by == "GO_Central": continue # remove self-annotations - has_reference = any( - reference.namespace == "PMID" for reference in source_assoc.evidence.has_supporting_reference - ) + has_reference = any(reference.namespace == "PMID" for reference in source_assoc.evidence.has_supporting_reference) if not has_reference: counter = counter + 1 if str(source_assoc.object.id) in ["GO:0005515", "GO:0005488"]: @@ -132,9 +127,7 @@ def parse_ortho_gaf(self): # if it's in the mapped dictionary, then we can replace the UniProt identifier with the # HGNC identifier, formatting that as a Curie with separate Namespace and ID fields. mapped_id = self.uniprot_to_hgnc_map[str(source_assoc.subject.id)] - source_assoc.subject.id = Curie( - namespace=mapped_id.split(":")[0], identity=mapped_id.split(":")[1] - ) + source_assoc.subject.id = Curie(namespace=mapped_id.split(":")[0], identity=mapped_id.split(":")[1]) self.convertible_annotations.append(source_assoc) return self.convertible_annotations @@ -175,4 +168,4 @@ def parse_p2g_gaf(self): if str(source_assoc.object.id) in ["GO:0005575", "GO:0008150", "GO:0003674"]: continue # remove root terms self.convertible_p2g_annotations.append(source_assoc) - return self.convertible_p2g_annotations \ No newline at end of file + return self.convertible_p2g_annotations diff --git a/src/gopreprocess/ortho_annotation_creation_controller.py b/src/gopreprocess/ortho_annotation_creation_controller.py index feae943..d6f5f93 100644 --- a/src/gopreprocess/ortho_annotation_creation_controller.py +++ b/src/gopreprocess/ortho_annotation_creation_controller.py @@ -7,10 +7,11 @@ import collections import copy -import click +import sys from datetime import datetime from typing import List -import sys + +import click import pandas as pd import pystow from gopreprocess.file_processors.ontology_processor import get_GO_aspector @@ -45,7 +46,6 @@ def dump_converted_annotations(converted_target_annotations: List[List[str]], so :type target_taxon: str """ - # using pandas in order to take advantage of pystow in terms of file location and handling df = pd.DataFrame(converted_target_annotations) df = df.applymap(convert_curie_to_string) @@ -197,7 +197,6 @@ def convert_annotations(self) -> None: source_genes = OrthoProcessor(target_genes, ortho_path, self.target_taxon, self.source_taxon).genes - transformed = {} for key, values in source_genes.items(): for value in values: @@ -249,9 +248,7 @@ def convert_annotations(self) -> None: converted_target_annotations.append(new_annotation.to_gaf_2_2_tsv()) if converted_target_annotations: - dump_converted_annotations(converted_target_annotations, - source_taxon=self.source_taxon, - target_taxon=self.target_taxon) + dump_converted_annotations(converted_target_annotations, source_taxon=self.source_taxon, target_taxon=self.target_taxon) else: print("FAIL!: no annotations to dump!") click.echo("No annotations were converted.") @@ -291,9 +288,7 @@ def generate_annotation( if str(annotation.subject.id) in source_genes.keys(): for gene in source_genes[str(annotation.subject.id)]: - if (gene in transformed_source_genes - and len(transformed_source_genes[gene]) > 1 - and go_aspector.is_biological_process(str(annotation.object.id))): + if gene in transformed_source_genes and len(transformed_source_genes[gene]) > 1 and go_aspector.is_biological_process(str(annotation.object.id)): output = ( "NON_1TO1_BP" + str(annotation.subject.id) @@ -318,8 +313,7 @@ def generate_annotation( else: new_annotation.evidence.with_support_from = [ConjunctiveSet(elements=[str(annotation.subject.id)])] print("no HGNC to UniProt map", str(annotation.subject.id)) - new_annotation.evidence.has_supporting_reference = [Curie(namespace="GO_REF", - identity=self.ortho_reference)] + new_annotation.evidence.has_supporting_reference = [Curie(namespace="GO_REF", identity=self.ortho_reference)] # if there is only one human ortholog of the mouse gene and the annotation is not a biological # process, then we add it, else we skip it. inferred from sequence similarity new_annotation.evidence.type = Curie(namespace="ECO", identity=iso_eco_code.split(":")[1])