Merge pull request #51 from geneontology/feb_updates

lint and clean
geneontology · Mar 18, 2024 · ac89c94 · ac89c94
2 parents df272fe + 313a1ef
commit ac89c94
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 23 deletions.
diff --git a/src/gopreprocess/file_processors/gaf_processor.py b/src/gopreprocess/file_processors/gaf_processor.py
@@ -110,14 +110,9 @@ def parse_ortho_gaf(self):
                         continue  # remove annotations that don't have a subject in the namespaces we're interested in
                     if str(source_assoc.evidence.type) not in experimental_evidence_codes:
                         continue
-                    if (
-                        source_assoc.provided_by == self.taxon_to_provider[self.target_taxon]
-                        or source_assoc.provided_by == "GO_Central"
-                    ):
+                    if source_assoc.provided_by == self.taxon_to_provider[self.target_taxon] or source_assoc.provided_by == "GO_Central":
                         continue  # remove self-annotations
-                    has_reference = any(
-                        reference.namespace == "PMID" for reference in source_assoc.evidence.has_supporting_reference
-                    )
+                    has_reference = any(reference.namespace == "PMID" for reference in source_assoc.evidence.has_supporting_reference)
                     if not has_reference:
                         counter = counter + 1
                     if str(source_assoc.object.id) in ["GO:0005515", "GO:0005488"]:
@@ -132,9 +127,7 @@ def parse_ortho_gaf(self):
                             # if it's in the mapped dictionary, then we can replace the UniProt identifier with the
                             # HGNC identifier, formatting that as a Curie with separate Namespace and ID fields.
                             mapped_id = self.uniprot_to_hgnc_map[str(source_assoc.subject.id)]
-                            source_assoc.subject.id = Curie(
-                                namespace=mapped_id.split(":")[0], identity=mapped_id.split(":")[1]
-                            )
+                            source_assoc.subject.id = Curie(namespace=mapped_id.split(":")[0], identity=mapped_id.split(":")[1])
                     self.convertible_annotations.append(source_assoc)
         return self.convertible_annotations
 
@@ -175,4 +168,4 @@ def parse_p2g_gaf(self):
                     if str(source_assoc.object.id) in ["GO:0005575", "GO:0008150", "GO:0003674"]:
                         continue  # remove root terms
                     self.convertible_p2g_annotations.append(source_assoc)
-        return self.convertible_p2g_annotations
+        return self.convertible_p2g_annotations
diff --git a/src/gopreprocess/ortho_annotation_creation_controller.py b/src/gopreprocess/ortho_annotation_creation_controller.py
@@ -7,10 +7,11 @@
 
 import collections
 import copy
-import click
+import sys
 from datetime import datetime
 from typing import List
-import sys
+
+import click
 import pandas as pd
 import pystow
 from gopreprocess.file_processors.ontology_processor import get_GO_aspector
@@ -45,7 +46,6 @@ def dump_converted_annotations(converted_target_annotations: List[List[str]], so
     :type target_taxon: str
 
     """
-
     # using pandas in order to take advantage of pystow in terms of file location and handling
     df = pd.DataFrame(converted_target_annotations)
     df = df.applymap(convert_curie_to_string)
@@ -197,7 +197,6 @@ def convert_annotations(self) -> None:
 
         source_genes = OrthoProcessor(target_genes, ortho_path, self.target_taxon, self.source_taxon).genes
 
-
         transformed = {}
         for key, values in source_genes.items():
             for value in values:
@@ -249,9 +248,7 @@ def convert_annotations(self) -> None:
                     converted_target_annotations.append(new_annotation.to_gaf_2_2_tsv())
 
         if converted_target_annotations:
-            dump_converted_annotations(converted_target_annotations,
-                                       source_taxon=self.source_taxon,
-                                       target_taxon=self.target_taxon)
+            dump_converted_annotations(converted_target_annotations, source_taxon=self.source_taxon, target_taxon=self.target_taxon)
         else:
             print("FAIL!: no annotations to dump!")
             click.echo("No annotations were converted.")
@@ -291,9 +288,7 @@ def generate_annotation(
 
         if str(annotation.subject.id) in source_genes.keys():
             for gene in source_genes[str(annotation.subject.id)]:
-                if (gene in transformed_source_genes
-                        and len(transformed_source_genes[gene]) > 1
-                        and go_aspector.is_biological_process(str(annotation.object.id))):
+                if gene in transformed_source_genes and len(transformed_source_genes[gene]) > 1 and go_aspector.is_biological_process(str(annotation.object.id)):
                     output = (
                         "NON_1TO1_BP"
                         + str(annotation.subject.id)
@@ -318,8 +313,7 @@ def generate_annotation(
                     else:
                         new_annotation.evidence.with_support_from = [ConjunctiveSet(elements=[str(annotation.subject.id)])]
                         print("no HGNC to UniProt map", str(annotation.subject.id))
-                    new_annotation.evidence.has_supporting_reference = [Curie(namespace="GO_REF",
-                                                                              identity=self.ortho_reference)]
+                    new_annotation.evidence.has_supporting_reference = [Curie(namespace="GO_REF", identity=self.ortho_reference)]
                     # if there is only one human ortholog of the mouse gene and the annotation is not a biological
                     # process, then we add it, else we skip it. inferred from sequence similarity
                     new_annotation.evidence.type = Curie(namespace="ECO", identity=iso_eco_code.split(":")[1])