Skip to content

Commit

Permalink
Merge pull request #654 from biolink/go-site-2081-gorule-0000006-iep-…
Browse files Browse the repository at this point in the history
…and-hep--restricted-usage

For #2081 repair goid, with/from and extension before running gorule …
  • Loading branch information
mugitty authored Oct 25, 2023
2 parents cca00e7 + d6d6a3a commit dc9a62f
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 49 deletions.
90 changes: 42 additions & 48 deletions ontobio/io/gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,46 @@ def parse_line(self, line):
if self.gaf_version() == "2.1" and (vals[3] == "" or vals[3] == "NOT") and self.config.ontology:
assoc = self.upgrade_empty_qualifier(assoc)

# Check for valid GO id and repair if necessary
split_line = assocparser.SplitLine(line=line, values=vals, taxon=str(assoc.object.taxon))
if not self._validate_id(str(assoc.object.id), split_line, context=ANNOTATION):
print("skipping because {} not validated!".format(assoc.object.id))
return assocparser.ParseResult(line, [], True)

valid_goid = self._validate_ontology_class_id(str(assoc.object.id), split_line)
if valid_goid is None:
return assocparser.ParseResult(line, [], True)
assoc.object.id = association.Curie.from_str(valid_goid)

references = self.validate_curie_ids(assoc.evidence.has_supporting_reference, split_line)
if references is None:
# Reporting occurs in above function call
return assocparser.ParseResult(line, [], True)

# With/From
for wf in assoc.evidence.with_support_from:
validated = self.validate_curie_ids(wf.elements, split_line)
if validated is None:
return assocparser.ParseResult(line, [], True)
with_support_from = self._unroll_withfrom_and_replair_obsoletes(split_line, 'gaf')
if with_support_from is None:
return assocparser.ParseResult(line, [], True)
assoc.evidence.with_support_from = with_support_from

# Extension
# repair, if possible any GO terms in the extensions that may be obsolete
if (0 < len(assoc.object_extensions)):
for ext in assoc.object_extensions:
validated = self.validate_curie_ids([e.term for e in ext.elements], split_line)
if validated is None:
return assocparser.ParseResult(line, [], True)
repaired = self._repair_extensions(assoc.object_extensions, split_line)
if repaired is None:
assoc.object_extensions = []
return assocparser.ParseResult(line, [], True)
assoc.object_extensions = repaired


## Run GO Rules, save split values into individual variables
# print("Config is {}".format(self.config.__dict__.keys()))
go_rule_results = qc.test_go_rules(assoc, self.config, group=self.group)
Expand All @@ -220,7 +260,7 @@ def parse_line(self, line):
msg="Passing Rule", rule=int(rule.id.split(":")[1]))

assoc = go_rule_results.annotation # type: association.GoAssociation
split_line = assocparser.SplitLine(line=line, values=vals, taxon=str(assoc.object.taxon))
#split_line = assocparser.SplitLine(line=line, values=vals, taxon=str(assoc.object.taxon))

if self.config.group_idspace is not None and assoc.provided_by not in self.config.group_idspace:
self.report.warning(line, Report.INVALID_ID, assoc.provided_by,
Expand All @@ -240,53 +280,7 @@ def parse_line(self, line):
## --assigned_by
if not self._validate_id(str(assoc.subject.id), split_line, allowed_ids=self.config.entity_idspaces):
return assocparser.ParseResult(line, [], True)

# Using a given gpi file to validate the gene object
# if self.gpi is not None:
# entity = self.gpi.get(str(assoc.subject.id), None)
# if entity is not None:
# assoc.subject.label = entity["symbol"]
# assoc.subject.fullname = entity["name"]
# assoc.subject.synonyms = entity["synonyms"].split("|")
# assoc.subject.type = entity["type"]

if not self._validate_id(str(assoc.object.id), split_line, context=ANNOTATION):
print("skipping because {} not validated!".format(assoc.object.id))
return assocparser.ParseResult(line, [], True)

valid_goid = self._validate_ontology_class_id(str(assoc.object.id), split_line)
if valid_goid is None:
return assocparser.ParseResult(line, [], True)
assoc.object.id = association.Curie.from_str(valid_goid)

references = self.validate_curie_ids(assoc.evidence.has_supporting_reference, split_line)
if references is None:
# Reporting occurs in above function call
return assocparser.ParseResult(line, [], True)

# With/From
for wf in assoc.evidence.with_support_from:
validated = self.validate_curie_ids(wf.elements, split_line)
if validated is None:
return assocparser.ParseResult(line, [], True)
with_support_from = self._unroll_withfrom_and_replair_obsoletes(split_line, 'gaf')
if with_support_from is None:
return assocparser.ParseResult(line, [], True)
assoc.evidence.with_support_from = with_support_from

# Extension
# repair, if possible any GO terms in the extensions that may be obsolete
if (0 < len(assoc.object_extensions)):
for ext in assoc.object_extensions:
validated = self.validate_curie_ids([e.term for e in ext.elements], split_line)
if validated is None:
return assocparser.ParseResult(line, [], True)
repaired = self._repair_extensions(assoc.object_extensions, split_line)
if repaired is None:
assoc.object_extensions = []
return assocparser.ParseResult(line, [], True)
assoc.object_extensions = repaired


# validation
self._validate_symbol(assoc.subject.label, split_line)

Expand Down
54 changes: 54 additions & 0 deletions tests/resources/obsolete.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,60 @@
"type" : "CLASS",
"lbl" : "ferric iron import across cell outer membrane"
}, {
"id" : "http://purl.obolibrary.org/obo/GO_0006758",
"meta" : {
"basicPropertyValues" : [ {
"pred" : "http://purl.obolibrary.org/obo/IAO_0100001",
"val" : "http://purl.obolibrary.org/obo/GO_0006754"
}, {
"pred" : "http://purl.obolibrary.org/obo/IAO_0000231",
"val" : "http://purl.obolibrary.org/obo/IAO_0000227"
} ],
"deprecated" : true
},
"type" : "CLASS"
}, {
"id" : "http://purl.obolibrary.org/obo/GO_0006754",
"meta" : {
"definition" : {
"val" : "The chemical reactions and pathways resulting in the formation of ATP, adenosine 5'-triphosphate, a universally important coenzyme and enzyme regulator.",
"xrefs" : [ "GOC:go_curators", "ISBN:0198506732" ]
},
"synonyms" : [ {
"pred" : "hasExactSynonym",
"val" : "ATP anabolism",
"xrefs" : [ ]
}, {
"pred" : "hasNarrowSynonym",
"val" : "ATP regeneration",
"xrefs" : [ ]
}, {
"pred" : "hasExactSynonym",
"val" : "ATP synthesis",
"xrefs" : [ ]
}, {
"pred" : "hasExactSynonym",
"val" : "ATP biosynthesis",
"xrefs" : [ ]
}, {
"pred" : "hasExactSynonym",
"val" : "ATP formation",
"xrefs" : [ ]
} ],
"basicPropertyValues" : [ {
"pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
"val" : "GO:0006759"
}, {
"pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId",
"val" : "GO:0006758"
}, {
"pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace",
"val" : "biological_process"
} ]
},
"type" : "CLASS",
"lbl" : "ATP biosynthetic process"
}, {
"id" : "http://purl.obolibrary.org/obo/GO_0016458",
"meta" : {
"definition" : {
Expand Down
3 changes: 2 additions & 1 deletion tests/test_local_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,4 +256,5 @@ def test_obsolete():
assert ont.replaced_by('GO:1') == ['GO:2']
assert ont.replaced_by('GO:4') == ['GO:3']
assert ont.replaced_by('GO:0005913') == ['GO:0005912']
assert n_obs == 4
assert ont.replaced_by('GO:0006758') == ['GO:0006754']
assert n_obs == 5
12 changes: 12 additions & 0 deletions tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ontobio.io import gaference
from ontobio.io import assocparser
from ontobio.io import gafparser
from ontobio.io.gafparser import GafParser
from ontobio.io import gpadparser
from ontobio import ontol, ontol_factory, ecomap

Expand Down Expand Up @@ -117,6 +118,17 @@ def test_go_rule_06():
test_result = qc.GoRule06().test(assoc, all_rules_config(ontology=ontology))
assert test_result.result_type == qc.ResultType.ERROR

#Ensure obsoleted GO id is repaired with alternate id first before Gorule06
line = ["GeneDB", "PF3D7_0507500", "SUB1", "enables", "GO:0006758", "PMID:12764150", "IEP", "PANTHER:PTN000623979|TAIR:locus:2099478", "C", "GORULE:0000006-6", "protease 1", "gene", "NCBITaxon:36329", "20090624", "GeneDB", "", ""]
obs_ontology = ontol_factory.OntologyFactory().create("tests/resources/obsolete.json")
p = GafParser(config=assocparser.AssocParserConfig(ontology=obs_ontology, rule_set = assocparser.RuleSet.ALL))
p.version = "2.2"
parsed = p.parse_line("\t".join(line))
assoc = parsed.associations[0]
assert assoc.object.id == Curie.from_str("GO:0006754")
test_result = qc.GoRule06().test(assoc, all_rules_config(ontology=obs_ontology))
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule_07():

assoc = make_annotation(goid="GO:0003824", evidence="IPI").associations[0]
Expand Down

0 comments on commit dc9a62f

Please sign in to comment.