Skip to content

Commit

Permalink
Merge pull request #51 from geneontology/feb_updates
Browse files Browse the repository at this point in the history
lint and clean
  • Loading branch information
sierra-moxon authored Mar 18, 2024
2 parents df272fe + 313a1ef commit ac89c94
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 23 deletions.
15 changes: 4 additions & 11 deletions src/gopreprocess/file_processors/gaf_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,9 @@ def parse_ortho_gaf(self):
continue # remove annotations that don't have a subject in the namespaces we're interested in
if str(source_assoc.evidence.type) not in experimental_evidence_codes:
continue
if (
source_assoc.provided_by == self.taxon_to_provider[self.target_taxon]
or source_assoc.provided_by == "GO_Central"
):
if source_assoc.provided_by == self.taxon_to_provider[self.target_taxon] or source_assoc.provided_by == "GO_Central":
continue # remove self-annotations
has_reference = any(
reference.namespace == "PMID" for reference in source_assoc.evidence.has_supporting_reference
)
has_reference = any(reference.namespace == "PMID" for reference in source_assoc.evidence.has_supporting_reference)
if not has_reference:
counter = counter + 1
if str(source_assoc.object.id) in ["GO:0005515", "GO:0005488"]:
Expand All @@ -132,9 +127,7 @@ def parse_ortho_gaf(self):
# if it's in the mapped dictionary, then we can replace the UniProt identifier with the
# HGNC identifier, formatting that as a Curie with separate Namespace and ID fields.
mapped_id = self.uniprot_to_hgnc_map[str(source_assoc.subject.id)]
source_assoc.subject.id = Curie(
namespace=mapped_id.split(":")[0], identity=mapped_id.split(":")[1]
)
source_assoc.subject.id = Curie(namespace=mapped_id.split(":")[0], identity=mapped_id.split(":")[1])
self.convertible_annotations.append(source_assoc)
return self.convertible_annotations

Expand Down Expand Up @@ -175,4 +168,4 @@ def parse_p2g_gaf(self):
if str(source_assoc.object.id) in ["GO:0005575", "GO:0008150", "GO:0003674"]:
continue # remove root terms
self.convertible_p2g_annotations.append(source_assoc)
return self.convertible_p2g_annotations
return self.convertible_p2g_annotations
18 changes: 6 additions & 12 deletions src/gopreprocess/ortho_annotation_creation_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

import collections
import copy
import click
import sys
from datetime import datetime
from typing import List
import sys

import click
import pandas as pd
import pystow
from gopreprocess.file_processors.ontology_processor import get_GO_aspector
Expand Down Expand Up @@ -45,7 +46,6 @@ def dump_converted_annotations(converted_target_annotations: List[List[str]], so
:type target_taxon: str
"""

# using pandas in order to take advantage of pystow in terms of file location and handling
df = pd.DataFrame(converted_target_annotations)
df = df.applymap(convert_curie_to_string)
Expand Down Expand Up @@ -197,7 +197,6 @@ def convert_annotations(self) -> None:

source_genes = OrthoProcessor(target_genes, ortho_path, self.target_taxon, self.source_taxon).genes


transformed = {}
for key, values in source_genes.items():
for value in values:
Expand Down Expand Up @@ -249,9 +248,7 @@ def convert_annotations(self) -> None:
converted_target_annotations.append(new_annotation.to_gaf_2_2_tsv())

if converted_target_annotations:
dump_converted_annotations(converted_target_annotations,
source_taxon=self.source_taxon,
target_taxon=self.target_taxon)
dump_converted_annotations(converted_target_annotations, source_taxon=self.source_taxon, target_taxon=self.target_taxon)
else:
print("FAIL!: no annotations to dump!")
click.echo("No annotations were converted.")
Expand Down Expand Up @@ -291,9 +288,7 @@ def generate_annotation(

if str(annotation.subject.id) in source_genes.keys():
for gene in source_genes[str(annotation.subject.id)]:
if (gene in transformed_source_genes
and len(transformed_source_genes[gene]) > 1
and go_aspector.is_biological_process(str(annotation.object.id))):
if gene in transformed_source_genes and len(transformed_source_genes[gene]) > 1 and go_aspector.is_biological_process(str(annotation.object.id)):
output = (
"NON_1TO1_BP"
+ str(annotation.subject.id)
Expand All @@ -318,8 +313,7 @@ def generate_annotation(
else:
new_annotation.evidence.with_support_from = [ConjunctiveSet(elements=[str(annotation.subject.id)])]
print("no HGNC to UniProt map", str(annotation.subject.id))
new_annotation.evidence.has_supporting_reference = [Curie(namespace="GO_REF",
identity=self.ortho_reference)]
new_annotation.evidence.has_supporting_reference = [Curie(namespace="GO_REF", identity=self.ortho_reference)]
# if there is only one human ortholog of the mouse gene and the annotation is not a biological
# process, then we add it, else we skip it. inferred from sequence similarity
new_annotation.evidence.type = Curie(namespace="ECO", identity=iso_eco_code.split(":")[1])
Expand Down

0 comments on commit ac89c94

Please sign in to comment.