Skip to content

Commit

Permalink
Cleanup definitions part 1
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 30, 2023
1 parent b73e986 commit b1158d5
Show file tree
Hide file tree
Showing 14 changed files with 54 additions and 27 deletions.
3 changes: 2 additions & 1 deletion src/pyobo/sources/cgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd

from pyobo.struct import Obo, Reference, Term, from_species
from pyobo.struct.typedef import exact_match
from pyobo.utils.path import ensure_df

__all__ = [
Expand All @@ -25,7 +26,7 @@ class CGNCGetter(Obo):

ontology = PREFIX
dynamic_version = True
typedefs = [from_species]
typedefs = [from_species, exact_match]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down
3 changes: 2 additions & 1 deletion src/pyobo/sources/chembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import chembl_downloader

from pyobo.struct import Obo, Reference, Term
from pyobo.struct.typedef import has_inchi, has_smiles
from pyobo.struct.typedef import exact_match, has_inchi, has_smiles

__all__ = [
"ChEMBLCompoundGetter",
Expand Down Expand Up @@ -45,6 +45,7 @@ class ChEMBLCompoundGetter(Obo):

ontology = "chembl.compound"
bioversions_key = "chembl"
typedefs = [exact_match]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down
2 changes: 2 additions & 0 deletions src/pyobo/sources/depmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pystow

from pyobo import Obo, Reference, Term
from pyobo.struct.typedef import exact_match

__all__ = [
"get_obo",
Expand All @@ -23,6 +24,7 @@ class DepMapGetter(Obo):

ontology = bioversions_key = PREFIX
data_version = VERSION
typedefs = [exact_match]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down
3 changes: 2 additions & 1 deletion src/pyobo/sources/drugcentral.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tqdm.auto import tqdm

from pyobo.struct import Obo, Reference, Synonym, Term
from pyobo.struct.typedef import has_inchi, has_smiles
from pyobo.struct.typedef import exact_match, has_inchi, has_smiles

__all__ = [
"DrugCentralGetter",
Expand All @@ -34,6 +34,7 @@ class DrugCentralGetter(Obo):
"""An ontology representation of the DrugCentral database."""

ontology = bioversions_key = PREFIX
typedefs = [exact_match]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down
2 changes: 2 additions & 0 deletions src/pyobo/sources/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
orthologous,
transcribes_to,
)
from pyobo.struct.typedef import exact_match
from pyobo.utils.path import ensure_path, prefix_directory_join

__all__ = [
Expand Down Expand Up @@ -212,6 +213,7 @@ class HGNCGetter(Obo):
transcribes_to,
orthologous,
member_of,
exact_match,
]
idspaces = IDSPACES
synonym_typedefs = [
Expand Down
4 changes: 3 additions & 1 deletion src/pyobo/sources/mgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pandas as pd
from tqdm.auto import tqdm

from pyobo.struct.typedef import exact_match

from ..struct import (
Obo,
Reference,
Expand Down Expand Up @@ -37,7 +39,7 @@ class MGIGetter(Obo):

ontology = PREFIX
dynamic_version = True
typedefs = [from_species, has_gene_product, transcribes_to]
typedefs = [from_species, has_gene_product, transcribes_to, exact_match]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down
6 changes: 4 additions & 2 deletions src/pyobo/sources/mirbase_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def get_obo(force: bool = False) -> Obo:
def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
"""Get miRBase family terms."""
df = get_df(version, force=force)
for family_id, name, mirna_id, mirna_name in tqdm(df.values, total=len(df.index)):
for family_id, name, mirna_id, mirna_name in tqdm(
df.values, total=len(df.index), unit_scale=True, desc="miRBase Family"
):
term = Term(
reference=Reference(prefix=PREFIX, identifier=family_id, name=name),
)
Expand All @@ -65,4 +67,4 @@ def get_df(version: str, force: bool = False) -> pd.DataFrame:


if __name__ == "__main__":
get_obo().write_default(use_tqdm=True)
get_obo().write_default(use_tqdm=True, write_obo=True, force=True)
2 changes: 1 addition & 1 deletion src/pyobo/sources/npass.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
logger.debug("multiple cids for %s: %s", identifier, pubchem_compound_ids)
for pubchem_compound_id in pubchem_compound_ids:
term.append_xref(
Reference(prefix="pubchem.compound", identifier=pubchem_compound_id)
Reference(prefix="pubchem.compound", identifier=pubchem_compound_id.strip())
)

for synonym in [iupac]:
Expand Down
11 changes: 5 additions & 6 deletions src/pyobo/sources/ror.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import zenodo_client
from tqdm.auto import tqdm

from pyobo.struct import Obo, Reference, SynonymTypeDef, Term, TypeDef
from pyobo.struct import Obo, Reference, Term, TypeDef
from pyobo.struct.struct import acronym

PREFIX = "ror"
ROR_ZENODO_RECORD_ID = "10086202"
Expand All @@ -21,8 +22,6 @@
SUCCESSOR = Reference(prefix="BFO", identifier="0000063")
PREDECESSOR = Reference(prefix="BFO", identifier="0000062")

ACRONYM = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003000", name="acronym"))

RMAP = {
"Related": TypeDef.from_triple("rdfs", "seeAlso"),
"Child": TypeDef(HAS_PART),
Expand All @@ -45,7 +44,7 @@ class RORGetter(Obo):

ontology = bioregistry_key = PREFIX
typedefs = list(RMAP.values())
synonym_typedefs = [ACRONYM]
synonym_typedefs = [acronym]
idspaces = {
"ror": "https://ror.org/",
"geonames": "https://www.geonames.org/",
Expand Down Expand Up @@ -110,8 +109,8 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
if synonym.startswith("The "):
term.append_synonym(synonym.removeprefix("The "))

for acronym in record.get("acronyms", []):
term.append_synonym(acronym, type=ACRONYM)
for acronym_synonym in record.get("acronyms", []):
term.append_synonym(acronym_synonym, type=acronym)

for prefix, xref_data in record.get("external_ids", {}).items():
if prefix == "OrgRef":
Expand Down
7 changes: 2 additions & 5 deletions src/pyobo/sources/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Iterable
from urllib.parse import unquote_plus

from ..struct import Obo, Reference, Synonym, SynonymTypeDef, Term, from_species
from ..struct import Obo, Reference, Synonym, Term, from_species
from ..utils.path import ensure_tar_df

__all__ = [
Expand All @@ -21,15 +21,12 @@
)
INNER_PATH = "S288C_reference_genome_R64-2-1_20150113/saccharomyces_cerevisiae_R64-2-1_20150113.gff"

alias_type = SynonymTypeDef.from_text("alias")


class SGDGetter(Obo):
"""An ontology representation of SGD's yeast gene nomenclature."""

bioversions_key = ontology = PREFIX
typedefs = [from_species]
synonym_typedefs = [alias_type]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms for SGD."""
Expand Down Expand Up @@ -68,7 +65,7 @@ def get_terms(ontology: Obo, force: bool = False) -> Iterable[Term]:
aliases = d.get("Alias")
if aliases:
for alias in aliases.split(","):
synonyms.append(Synonym(name=unquote_plus(alias), type=alias_type))
synonyms.append(Synonym(name=unquote_plus(alias)))

term = Term(
reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
Expand Down
12 changes: 6 additions & 6 deletions src/pyobo/sources/slm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import pandas as pd
from tqdm.auto import tqdm

from pyobo import Obo, Reference, SynonymTypeDef, Term
from pyobo.struct.typedef import has_inchi, has_smiles
from pyobo import Obo, Reference, Term
from pyobo.struct.struct import abbreviation as abbreviation_typedef
from pyobo.struct.typedef import exact_match, has_inchi, has_smiles
from pyobo.utils.path import ensure_df

__all__ = [
Expand Down Expand Up @@ -38,14 +39,13 @@
"PMID",
]

abreviation_type = SynonymTypeDef.from_text("abbreviation")


class SLMGetter(Obo):
"""An ontology representation of SwissLipid's lipid nomenclature."""

ontology = bioversions_key = PREFIX
synonym_typedefs = [abreviation_type]
typedefs = [exact_match]
synonym_typedefs = [abbreviation_typedef]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down Expand Up @@ -94,7 +94,7 @@ def iter_terms(version: str, force: bool = False):
if pd.notna(level):
term.append_property("level", level)
if pd.notna(abbreviation):
term.append_synonym(abbreviation, type=abreviation_type)
term.append_synonym(abbreviation, type=abbreviation_typedef)
if pd.notna(synonyms):
for synonym in synonyms.split("|"):
term.append_synonym(synonym.strip())
Expand Down
3 changes: 2 additions & 1 deletion src/pyobo/sources/zfin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
has_gene_product,
orthologous,
)
from pyobo.struct.typedef import exact_match
from pyobo.utils.io import multidict, multisetdict
from pyobo.utils.path import ensure_df

Expand All @@ -40,7 +41,7 @@ class ZFINGetter(Obo):
"""An ontology representation of ZFIN's zebrafish database."""

bioversions_key = ontology = PREFIX
typedefs = [from_species, has_gene_product, orthologous]
typedefs = [from_species, has_gene_product, orthologous, exact_match]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in ZFIN."""
Expand Down
19 changes: 17 additions & 2 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@ def from_text(
DEFAULT_SYNONYM_TYPE = SynonymTypeDef(
reference=Reference(prefix="oboInOwl", identifier="SynonymType", name="Synonym"),
)
abbreviation = SynonymTypeDef(
reference=Reference(prefix="OMO", identifier="0003000", name="abbreviation")
)
acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))


ReferenceHint = Union[Reference, "Term", Tuple[str, str], str]

Expand Down Expand Up @@ -425,8 +430,13 @@ def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
yield f"{parent_tag}: {parent}"

for typedef, references in sorted(self.relationships.items(), key=_sort_relations):
if typedef not in typedefs:
logger.warning(f"[{ontology}] typedef not defined in OBO: {typedef}")
if (not typedefs or typedef not in typedefs) and (
ontology,
typedef.curie,
) not in _TYPEDEF_WARNINGS:
logger.warning(f"[{ontology}] typedef not defined in OBO: {typedef.curie}")
_TYPEDEF_WARNINGS.add((ontology, typedef.curie))

typedef_preferred_curie = typedef.preferred_curie
for reference in sorted(references, key=attrgetter("prefix", "identifier")):
s = f"relationship: {typedef_preferred_curie} {reference.preferred_curie}"
Expand All @@ -450,6 +460,10 @@ def _escape(s) -> str:
return s.replace("\n", "\\n").replace('"', '\\"')


#: A set of warnings, used to make sure we don't show the same one over and over
_TYPEDEF_WARNINGS: Set[Tuple[str, str]] = set()


def _sort_relations(r):
typedef, _references = r
return typedef.reference.name or typedef.reference.identifier
Expand Down Expand Up @@ -665,6 +679,7 @@ def iterate_obo_lines(self) -> Iterable[str]:
yield f'property_value: http://purl.org/dc/terms/license "{license_spdx_id}" xsd:string'
description = bioregistry.get_description(self.ontology)
if description:
description = obo_escape_slim(description.strip())
yield f'property_value: http://purl.org/dc/elements/1.1/description "{description}" xsd:string'

for root_term in self.root_terms or []:
Expand Down
4 changes: 4 additions & 0 deletions src/pyobo/struct/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
"enables",
"participates_in",
"has_participant",
"exact_match",
"has_dbxref",
# Properties
"has_inchi",
"has_smiles",
]
Expand Down Expand Up @@ -266,6 +269,7 @@ def get_reference_tuple(relation: RelationHint) -> Tuple[str, str]:
has_ontology_root_term = TypeDef.from_triple(
prefix=IAO_PREFIX, identifier="0000700", name="has ontology root term"
)
has_dbxref = TypeDef.from_curie("oboInOwl:hasDbXref", name="has database cross-reference")

editor_note = Reference(prefix=IAO_PREFIX, identifier="0000116", name="editor note")

Expand Down

0 comments on commit b1158d5

Please sign in to comment.