diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py index 04ddc6ad..d2be5037 100644 --- a/ontobio/io/gafparser.py +++ b/ontobio/io/gafparser.py @@ -201,6 +201,46 @@ def parse_line(self, line): if self.gaf_version() == "2.1" and (vals[3] == "" or vals[3] == "NOT") and self.config.ontology: assoc = self.upgrade_empty_qualifier(assoc) + # Check for valid GO id and repair if necessary + split_line = assocparser.SplitLine(line=line, values=vals, taxon=str(assoc.object.taxon)) + if not self._validate_id(str(assoc.object.id), split_line, context=ANNOTATION): + print("skipping because {} not validated!".format(assoc.object.id)) + return assocparser.ParseResult(line, [], True) + + valid_goid = self._validate_ontology_class_id(str(assoc.object.id), split_line) + if valid_goid is None: + return assocparser.ParseResult(line, [], True) + assoc.object.id = association.Curie.from_str(valid_goid) + + references = self.validate_curie_ids(assoc.evidence.has_supporting_reference, split_line) + if references is None: + # Reporting occurs in above function call + return assocparser.ParseResult(line, [], True) + + # With/From + for wf in assoc.evidence.with_support_from: + validated = self.validate_curie_ids(wf.elements, split_line) + if validated is None: + return assocparser.ParseResult(line, [], True) + with_support_from = self._unroll_withfrom_and_replair_obsoletes(split_line, 'gaf') + if with_support_from is None: + return assocparser.ParseResult(line, [], True) + assoc.evidence.with_support_from = with_support_from + + # Extension + # repair, if possible any GO terms in the extensions that may be obsolete + if (0 < len(assoc.object_extensions)): + for ext in assoc.object_extensions: + validated = self.validate_curie_ids([e.term for e in ext.elements], split_line) + if validated is None: + return assocparser.ParseResult(line, [], True) + repaired = self._repair_extensions(assoc.object_extensions, split_line) + if repaired is None: + assoc.object_extensions = [] + return assocparser.ParseResult(line, [], True) + assoc.object_extensions = repaired + + ## Run GO Rules, save split values into individual variables # print("Config is {}".format(self.config.__dict__.keys())) go_rule_results = qc.test_go_rules(assoc, self.config, group=self.group) @@ -220,7 +260,7 @@ def parse_line(self, line): msg="Passing Rule", rule=int(rule.id.split(":")[1])) assoc = go_rule_results.annotation # type: association.GoAssociation - split_line = assocparser.SplitLine(line=line, values=vals, taxon=str(assoc.object.taxon)) + #split_line = assocparser.SplitLine(line=line, values=vals, taxon=str(assoc.object.taxon)) if self.config.group_idspace is not None and assoc.provided_by not in self.config.group_idspace: self.report.warning(line, Report.INVALID_ID, assoc.provided_by, @@ -240,53 +280,7 @@ def parse_line(self, line): ## --assigned_by if not self._validate_id(str(assoc.subject.id), split_line, allowed_ids=self.config.entity_idspaces): return assocparser.ParseResult(line, [], True) - - # Using a given gpi file to validate the gene object - # if self.gpi is not None: - # entity = self.gpi.get(str(assoc.subject.id), None) - # if entity is not None: - # assoc.subject.label = entity["symbol"] - # assoc.subject.fullname = entity["name"] - # assoc.subject.synonyms = entity["synonyms"].split("|") - # assoc.subject.type = entity["type"] - - if not self._validate_id(str(assoc.object.id), split_line, context=ANNOTATION): - print("skipping because {} not validated!".format(assoc.object.id)) - return assocparser.ParseResult(line, [], True) - - valid_goid = self._validate_ontology_class_id(str(assoc.object.id), split_line) - if valid_goid is None: - return assocparser.ParseResult(line, [], True) - assoc.object.id = association.Curie.from_str(valid_goid) - - references = self.validate_curie_ids(assoc.evidence.has_supporting_reference, split_line) - if references is None: - # Reporting occurs in above function call - return assocparser.ParseResult(line, [], True) - - # With/From - for wf in assoc.evidence.with_support_from: - validated = self.validate_curie_ids(wf.elements, split_line) - if validated is None: - return assocparser.ParseResult(line, [], True) - with_support_from = self._unroll_withfrom_and_replair_obsoletes(split_line, 'gaf') - if with_support_from is None: - return assocparser.ParseResult(line, [], True) - assoc.evidence.with_support_from = with_support_from - - # Extension - # repair, if possible any GO terms in the extensions that may be obsolete - if (0 < len(assoc.object_extensions)): - for ext in assoc.object_extensions: - validated = self.validate_curie_ids([e.term for e in ext.elements], split_line) - if validated is None: - return assocparser.ParseResult(line, [], True) - repaired = self._repair_extensions(assoc.object_extensions, split_line) - if repaired is None: - assoc.object_extensions = [] - return assocparser.ParseResult(line, [], True) - assoc.object_extensions = repaired - + # validation self._validate_symbol(assoc.subject.label, split_line) diff --git a/tests/resources/obsolete.json b/tests/resources/obsolete.json index 58914703..09ffea5d 100644 --- a/tests/resources/obsolete.json +++ b/tests/resources/obsolete.json @@ -115,6 +115,60 @@ "type" : "CLASS", "lbl" : "ferric iron import across cell outer membrane" }, { + "id" : "http://purl.obolibrary.org/obo/GO_0006758", + "meta" : { + "basicPropertyValues" : [ { + "pred" : "http://purl.obolibrary.org/obo/IAO_0100001", + "val" : "http://purl.obolibrary.org/obo/GO_0006754" + }, { + "pred" : "http://purl.obolibrary.org/obo/IAO_0000231", + "val" : "http://purl.obolibrary.org/obo/IAO_0000227" + } ], + "deprecated" : true + }, + "type" : "CLASS" + }, { + "id" : "http://purl.obolibrary.org/obo/GO_0006754", + "meta" : { + "definition" : { + "val" : "The chemical reactions and pathways resulting in the formation of ATP, adenosine 5'-triphosphate, a universally important coenzyme and enzyme regulator.", + "xrefs" : [ "GOC:go_curators", "ISBN:0198506732" ] + }, + "synonyms" : [ { + "pred" : "hasExactSynonym", + "val" : "ATP anabolism", + "xrefs" : [ ] + }, { + "pred" : "hasNarrowSynonym", + "val" : "ATP regeneration", + "xrefs" : [ ] + }, { + "pred" : "hasExactSynonym", + "val" : "ATP synthesis", + "xrefs" : [ ] + }, { + "pred" : "hasExactSynonym", + "val" : "ATP biosynthesis", + "xrefs" : [ ] + }, { + "pred" : "hasExactSynonym", + "val" : "ATP formation", + "xrefs" : [ ] + } ], + "basicPropertyValues" : [ { + "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId", + "val" : "GO:0006759" + }, { + "pred" : "http://www.geneontology.org/formats/oboInOwl#hasAlternativeId", + "val" : "GO:0006758" + }, { + "pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace", + "val" : "biological_process" + } ] + }, + "type" : "CLASS", + "lbl" : "ATP biosynthetic process" + }, { "id" : "http://purl.obolibrary.org/obo/GO_0016458", "meta" : { "definition" : { diff --git a/tests/test_local_json.py b/tests/test_local_json.py index 4595fb94..69838fc5 100644 --- a/tests/test_local_json.py +++ b/tests/test_local_json.py @@ -256,4 +256,5 @@ def test_obsolete(): assert ont.replaced_by('GO:1') == ['GO:2'] assert ont.replaced_by('GO:4') == ['GO:3'] assert ont.replaced_by('GO:0005913') == ['GO:0005912'] - assert n_obs == 4 + assert ont.replaced_by('GO:0006758') == ['GO:0006754'] + assert n_obs == 5 diff --git a/tests/test_qc.py b/tests/test_qc.py index acfce2d3..5b2a257b 100644 --- a/tests/test_qc.py +++ b/tests/test_qc.py @@ -9,6 +9,7 @@ from ontobio.io import gaference from ontobio.io import assocparser from ontobio.io import gafparser +from ontobio.io.gafparser import GafParser from ontobio.io import gpadparser from ontobio import ontol, ontol_factory, ecomap @@ -117,6 +118,17 @@ def test_go_rule_06(): test_result = qc.GoRule06().test(assoc, all_rules_config(ontology=ontology)) assert test_result.result_type == qc.ResultType.ERROR + #Ensure obsoleted GO id is repaired with alternate id first before Gorule06 + line = ["GeneDB", "PF3D7_0507500", "SUB1", "enables", "GO:0006758", "PMID:12764150", "IEP", "PANTHER:PTN000623979|TAIR:locus:2099478", "C", "GORULE:0000006-6", "protease 1", "gene", "NCBITaxon:36329", "20090624", "GeneDB", "", ""] + obs_ontology = ontol_factory.OntologyFactory().create("tests/resources/obsolete.json") + p = GafParser(config=assocparser.AssocParserConfig(ontology=obs_ontology, rule_set = assocparser.RuleSet.ALL)) + p.version = "2.2" + parsed = p.parse_line("\t".join(line)) + assoc = parsed.associations[0] + assert assoc.object.id == Curie.from_str("GO:0006754") + test_result = qc.GoRule06().test(assoc, all_rules_config(ontology=obs_ontology)) + assert test_result.result_type == qc.ResultType.PASS + def test_go_rule_07(): assoc = make_annotation(goid="GO:0003824", evidence="IPI").associations[0]