Skip to content

Commit

Permalink
Typedef and xref cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Oct 30, 2023
1 parent fb3444f commit f34041d
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 26 deletions.
4 changes: 2 additions & 2 deletions src/pyobo/sources/cgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def get_terms(force: bool = False) -> Iterable[Term]:
)
term.set_species(identifier="9031", name="Gallus gallus")
if entrez_id and pd.notna(entrez_id):
term.append_xref(Reference(prefix="ncbigene", identifier=entrez_id))
term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
if pd.notna(ensembl_id):
term.append_xref(Reference(prefix="ensembl", identifier=ensembl_id))
term.append_exact_match(Reference(prefix="ensembl", identifier=ensembl_id))
if synonym_1 and pd.notna(synonym_1):
term.append_synonym(synonym_1)
if synoynm_2 and pd.notna(synoynm_2):
Expand Down
7 changes: 4 additions & 3 deletions src/pyobo/sources/chembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import chembl_downloader

from pyobo.struct import Obo, Reference, Term
from pyobo.struct.typedef import has_inchi, has_smiles

__all__ = [
"ChEMBLCompoundGetter",
Expand Down Expand Up @@ -66,11 +67,11 @@ def iter_terms(version: str) -> Iterable[Term]:
# TODO add xrefs?
term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
if smiles:
term.append_property("smiles", smiles)
term.append_property(has_smiles, smiles)
if inchi:
term.append_property("inchi", inchi)
term.append_property(has_inchi, inchi)
if inchi_key:
term.append_xref(Reference(prefix="inchikey", identifier=inchi_key))
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
yield term


Expand Down
8 changes: 4 additions & 4 deletions src/pyobo/sources/drugbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from ..getters import NoBuild
from ..struct import Obo, Reference, Term
from ..struct.typedef import has_salt
from ..struct.typedef import has_inchi, has_salt, has_smiles
from ..utils.cache import cached_pickle
from ..utils.path import prefix_directory_join

Expand Down Expand Up @@ -121,10 +121,10 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
if identifier:
term.append_xref(Reference(prefix=xref_prefix, identifier=identifier))

for prop in ["smiles", "inchi"]:
identifier = drug_info.get(xref_prefix)
for prop, debio_curie in [("smiles", has_smiles), ("inchi", has_inchi)]:
identifier = drug_info.get(prop)
if identifier:
term.append_property(prop, identifier)
term.append_property(debio_curie, identifier)

for salt in drug_info.get("salts", []):
term.append_relationship(
Expand Down
13 changes: 8 additions & 5 deletions src/pyobo/sources/drugcentral.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tqdm.auto import tqdm

from pyobo.struct import Obo, Reference, Synonym, Term
from pyobo.struct.typedef import has_inchi, has_smiles

__all__ = [
"DrugCentralGetter",
Expand Down Expand Up @@ -87,13 +88,15 @@ def iter_terms() -> Iterable[Term]:
xrefs=xrefs.get(drugcentral_id, []),
)
if inchi_key:
term.append_xref(Reference(prefix="inchikey", identifier=inchi_key))
term.append_property("smiles", smiles)
term.append_property("inchi", inchi)
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
if smiles:
term.append_property(has_smiles, smiles)
if inchi:
term.append_property(has_inchi, inchi)
if cas:
term.append_xref(Reference(prefix="cas", identifier=cas))
term.append_exact_match(Reference(prefix="cas", identifier=cas))
yield term


if __name__ == "__main__":
DrugCentralGetter.cli()
get_obo().write_default(write_obo=True)
2 changes: 1 addition & 1 deletion src/pyobo/sources/hgncgenefamily.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class HGNCGroupGetter(Obo):
"""An ontology representation of HGNC's gene group nomenclature."""

ontology = PREFIX
dynamic_version = True
bioversions_key = "hgnc"
synonym_typedefs = [symbol_type]
typedefs = [from_species, enables]

Expand Down
9 changes: 5 additions & 4 deletions src/pyobo/sources/mirbase_mature.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,16 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
for _, name, previous_name, mirbase_mature_id in tqdm(
df.values, total=len(df.index), unit_scale=True
):
synonyms = []
if pd.notna(previous_name):
synonyms.append(Synonym(name=previous_name))
yield Term(
reference=Reference(
prefix=PREFIX, identifier=mirbase_mature_id, name=name if pd.notna(name) else None
),
synonyms=[
Synonym(name=previous_name),
],
synonyms=synonyms,
)


if __name__ == "__main__":
MiRBaseMatureGetter.cli()
get_obo().write_default(write_obo=True, write_obograph=True, use_tqdm=True)
12 changes: 7 additions & 5 deletions src/pyobo/sources/slm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import pandas as pd
from tqdm.auto import tqdm

from pyobo import Obo, SynonymTypeDef, Term
from pyobo import Obo, Reference, SynonymTypeDef, Term
from pyobo.struct.typedef import has_inchi, has_smiles
from pyobo.utils.path import ensure_df

__all__ = [
Expand Down Expand Up @@ -90,22 +91,23 @@ def iter_terms(version: str, force: bool = False):
else:
raise ValueError(identifier)
term = Term.from_triple(PREFIX, identifier, name)
term.append_property("level", level)
if pd.notna(level):
term.append_property("level", level)
if pd.notna(abbreviation):
term.append_synonym(abbreviation, type=abreviation_type)
if pd.notna(synonyms):
for synonym in synonyms.split("|"):
term.append_synonym(synonym.strip())
if pd.notna(smiles):
term.append_property("smiles", smiles)
term.append_property(has_smiles, smiles)
if pd.notna(inchi) and inchi != "InChI=none":
if inchi.startswith("InChI="):
inchi = inchi[len("InChI=") :]
term.append_property("inchi", inchi)
term.append_property(has_inchi, inchi)
if pd.notna(inchikey):
if inchikey.startswith("InChIKey="):
inchikey = inchikey[len("InChIKey=") :]
term.append_property("inchikey", inchikey)
term.append_xref(Reference(prefix="inchikey", identifier=inchikey))
if pd.notna(chebi_id):
term.append_xref(("chebi", chebi_id))
if pd.notna(lipidmaps_id):
Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/struct/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def validate_identifier(cls, values): # noqa
if norm_prefix is None:
raise ExpansionError(f"Unknown prefix: {prefix}")
values["prefix"] = norm_prefix
values["identifier"] = bioregistry.standardize_identifier(norm_prefix, identifier)
values["identifier"] = bioregistry.standardize_identifier(norm_prefix, identifier).strip()
# if not bioregistry.is_valid_identifier(norm_prefix, values["identifier"]):
# raise ValueError(f"non-standard identifier: {norm_prefix}:{norm_identifier}")
return values
Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def _ensure_ref(reference: ReferenceHint) -> Reference:
return Reference(prefix=reference[0], identifier=reference[1])
if isinstance(reference, Reference):
return reference
raise TypeError
raise TypeError(f"invalid type given for a reference ({type(reference)}): {reference}")


@dataclass
Expand Down
11 changes: 11 additions & 0 deletions src/pyobo/struct/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
"enables",
"participates_in",
"has_participant",
"has_inchi",
"has_smiles",
]


Expand Down Expand Up @@ -292,6 +294,15 @@ def get_reference_tuple(relation: RelationHint) -> Tuple[str, str]:
reference=Reference(prefix="ro", identifier="0018038", name="has functional parent"),
)

has_smiles = TypeDef(
reference=Reference(prefix="debio", identifier="0000022", name="has SMILES"),
)

has_inchi = TypeDef(
reference=Reference(prefix="debio", identifier="0000020", name="has InChI"),
)


default_typedefs: Dict[Tuple[str, str], TypeDef] = {
v.pair: v for k, v in locals().items() if isinstance(v, TypeDef)
}
Expand Down

0 comments on commit f34041d

Please sign in to comment.