Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use OMO for synonym type predicates #88

Merged
merged 12 commits into from
Nov 7, 2023
71 changes: 41 additions & 30 deletions src/ncbitaxon.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@

from collections import defaultdict
from datetime import date
from textwrap import dedent

oboInOwl = {
"SynonymTypeProperty": "synonym_type_property",
@@ -24,24 +25,27 @@
related_synonym = "oboInOwl:hasRelatedSynonym"
broad_synonym = "oboInOwl:hasBroadSynonym"

# See OMO properties at
# https://github.com/information-artifact-ontology/ontology-metadata/blob/master/src/templates/annotation_properties.tsv
predicates = {
"acronym": broad_synonym,
"anamorph": related_synonym,
"blast name": related_synonym,
"common name": exact_synonym,
"equivalent name": exact_synonym,
"genbank acronym": broad_synonym,
"genbank anamorph": related_synonym,
"genbank common name": exact_synonym,
"genbank synonym": related_synonym,
"in-part": related_synonym,
"misnomer": related_synonym,
"misspelling": related_synonym,
"synonym": related_synonym,
"scientific name": exact_synonym,
"teleomorph": related_synonym,
"acronym": (broad_synonym, "OMO:0003000"), # abbreviation
"anamorph": (related_synonym, None),
"blast name": (related_synonym, None),
"common name": (exact_synonym, "OMO:0003003"), # layperson synonym
"equivalent name": (exact_synonym, None),
"genbank acronym": (broad_synonym, None),
"genbank anamorph": (related_synonym, None),
"genbank common name": (exact_synonym, None),
"genbank synonym": (related_synonym, None),
"in-part": (related_synonym, None),
"misnomer": (related_synonym, "OMO:0003007"), # misnomer
"misspelling": (related_synonym, "OMO:0003006"), # misspelling
"synonym": (related_synonym, None),
"scientific name": (exact_synonym, None),
"teleomorph": (related_synonym, None),
}


ranks = [
"class",
"cohort",
@@ -110,16 +114,17 @@ def convert_synonyms(tax_id, synonyms):
for synonym, unique, name_class in synonyms:
if name_class in predicates:
synonym = escape_literal(synonym)
predicate = predicates[name_class]
synonym_type = label_to_id(name_class)
predicate, synonym_type_curie = predicates[name_class]
if synonym_type_curie is None:
synonym_type_curie = "ncbitaxon:" + label_to_id(name_class)
output.append(
f"""
NCBITaxon:{tax_id} {predicate} "{synonym}"^^xsd:string .
[ a owl:Axiom
; owl:annotatedSource NCBITaxon:{tax_id}
; owl:annotatedProperty {predicate}
; owl:annotatedTarget "{synonym}"^^xsd:string
; oboInOwl:hasSynonymType ncbitaxon:{synonym_type}
; oboInOwl:hasSynonymType {synonym_type_curie}
] ."""
)
return output
@@ -195,6 +200,7 @@ def convert(taxdmp_path, output_path, taxa=None):
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix obo: <http://purl.obolibrary.org/obo/> .
@prefix oboInOwl: <http://www.geneontology.org/formats/oboInOwl#> .
@prefix OMO: <http://purl.obolibrary.org/obo/OMO_> .
@prefix terms: <http://purl.org/dc/terms/> .
@prefix ncbitaxon: <http://purl.obolibrary.org/obo/ncbitaxon#> .
@prefix NCBITaxon: <http://purl.obolibrary.org/obo/NCBITaxon_> .
@@ -229,18 +235,23 @@ def convert(taxdmp_path, output_path, taxa=None):
.
"""
)
for label, parent in predicates.items():
predicate = label_to_id(label)
parent = parent.replace("oboInOwl", "oio")
output.write(
f"""
ncbitaxon:{predicate} a owl:AnnotationProperty
; rdfs:label "{label}"^^xsd:string
; oboInOwl:hasScope "{parent}"^^xsd:string
; rdfs:subPropertyOf oboInOwl:SynonymTypeProperty
.
"""
)

for label, (parent, omo_curie) in predicates.items():
if omo_curie is not None:
output.write(dedent(f"""
{omo_curie} a owl:AnnotationProperty ;
rdfs:label "{label}"^^xsd:string ;
rdfs:subPropertyOf oboInOwl:SynonymTypeProperty .
"""))
else:
predicate = label_to_id(label)
parent = parent.replace("oboInOwl", "oio")
output.write(dedent(f"""
ncbitaxon:{predicate} a owl:AnnotationProperty ;
rdfs:label "{label}"^^xsd:string ;
oboInOwl:hasScope "{parent}"^^xsd:string ;
rdfs:subPropertyOf oboInOwl:SynonymTypeProperty .
"""))

with zipfile.ZipFile(taxdmp_path) as taxdmp:
with taxdmp.open("names.dmp") as dmp: