Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma committed Dec 31, 2024
1 parent 4d51227 commit 0dba1c1
Show file tree
Hide file tree
Showing 5 changed files with 414 additions and 333 deletions.
2 changes: 1 addition & 1 deletion src/therapy/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _normalize_disease(self, query: str) -> str | None:
if term in self._disease_cache:
return self._disease_cache[term]
response = self.disease_normalizer.normalize(term)
normalized_id = response.normalized_id
normalized_id = response.disease.primaryCode.root if response.disease else None
self._disease_cache[term] = normalized_id
if normalized_id is None:
_logger.warning("Failed to normalize disease term: %s", query)
Expand Down
95 changes: 58 additions & 37 deletions src/therapy/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,21 @@
from typing import Any, TypeVar

from botocore.exceptions import ClientError
from ga4gh.core.models import MappableConcept, ConceptMapping, Coding, code, Extension, Relation
from ga4gh.core.models import (
Coding,
ConceptMapping,
Extension,
MappableConcept,
Relation,
code,
)
from uvicorn.config import logger

from therapy import NAMESPACE_LUIS, PREFIX_LOOKUP, SOURCES
from therapy.database import AbstractDatabase
from therapy.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
BaseNormalizationService,
HasIndication,
MatchesNormalized,
Expand Down Expand Up @@ -351,19 +360,16 @@ def _add_merged_meta(self, response: NormalizationService) -> NormalizationServi
"""
sources_meta = {}
therapy = response.therapy
sources = [response.normalized_id.split(":")[0]] # type: ignore[union-attr]
if therapy.mappings: # type: ignore[union-attr]
sources += [m.coding.system for m in therapy.mappings] # type: ignore[union-attr]

sources = []
for m in therapy.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system)
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

for src in sources:
try:
src_name = SourceName(PREFIX_LOOKUP[src])
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
sources_meta[src_name] = self.db.get_source_metadata(src_name)
if src not in sources_meta:
sources_meta[src] = self.db.get_source_metadata(src)
response.source_meta_ = sources_meta # type: ignore[assignment]
return response

Expand All @@ -390,24 +396,49 @@ def _add_therapy(
:param MatchType match_type: type of match achieved
:return: completed response object ready to return to user
"""

def _create_concept_mapping(
concept_id: str, relation: Relation = Relation.RELATED_MATCH
) -> ConceptMapping:
"""Create concept mapping for identifier
``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
namespace prefix, in that order of preference, if available.
:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:return: Concept mapping for identifier
"""
source, source_id = concept_id.split(":")

try:
source = NamespacePrefix(source)
except ValueError:
try:
source = NamespacePrefix(source.upper())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = NAMESPACE_TO_SYSTEM_URI.get(source, source)

return ConceptMapping(
coding=Coding(code=code(source_id), system=system), relation=relation
)

therapy_obj = MappableConcept(
id=f"normalize.therapy.{record['concept_id']}",
primaryCode=code(root=record["concept_id"]),
conceptType="Therapy",
label=record.get("label")
label=record.get("label"),
)

# mappings
mappings = [
_create_concept_mapping(record["concept_id"], relation=Relation.EXACT_MATCH)
]
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, source_code = source_id.split(":")
mappings.append(
ConceptMapping(
coding=Coding(
code=code(source_code), system=system.lower()
),
relation=Relation.RELATED_MATCH,
)
)
mappings.extend(_create_concept_mapping(source_id) for source_id in source_ids)
if mappings:
therapy_obj.mappings = mappings

Expand Down Expand Up @@ -437,14 +468,8 @@ def _add_therapy(
indication = self._get_indication(ind_db)

if indication.normalized_disease_id:
system, source_code = indication.normalized_disease_id.split(":")
mappings = [
ConceptMapping(
coding=Coding(
code=code(source_code), system=system.lower()
),
relation=Relation.RELATED_MATCH,
)
_create_concept_mapping(indication.normalized_disease_id)
]
else:
mappings = []
Expand All @@ -464,16 +489,12 @@ def _add_therapy(
if inds_list:
approv_value["has_indication"] = inds_list

approv = Extension(
name="regulatory_approval", value=approv_value
)
approv = Extension(name="regulatory_approval", value=approv_value)
extensions.append(approv)

trade_names = record.get("trade_names")
if trade_names:
extensions.append(
Extension(name="trade_names", value=trade_names)
)
extensions.append(Extension(name="trade_names", value=trade_names))

if extensions:
therapy_obj.extensions = extensions
Expand Down
60 changes: 55 additions & 5 deletions src/therapy/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,44 @@ class NamespacePrefix(Enum):
WIKIDATA = "wikidata"


# Source to URI. Will use OBO Foundry persistent URL (PURL) or source homepage
NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
NamespacePrefix.ATC: "https://www.who.int/tools/atc-ddd-toolkit/atc-classification/",
NamespacePrefix.CHEBI: "http://purl.obolibrary.org/obo/chebi.owl",
NamespacePrefix.CHEMBL: "https://www.ebi.ac.uk/chembl/",
NamespacePrefix.CHEMIDPLUS: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
NamespacePrefix.CASREGISTRY: "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
NamespacePrefix.CVX: "https://www2a.cdc.gov/vaccines/iis/iisstandards/vaccines.asp?rpt=cvx",
NamespacePrefix.DRUGBANK: "https://go.drugbank.com",
NamespacePrefix.DRUGCENTRAL: "https://drugcentral.org",
NamespacePrefix.DRUGSATFDA_ANDA: "https://www.fda.gov/drugs/types-applications/abbreviated-new-drug-application-anda",
NamespacePrefix.DRUGSATFDA_NDA: "https://www.fda.gov/drugs/types-applications/new-drug-application-nda",
NamespacePrefix.HEMONC: "https://hemonc.org",
NamespacePrefix.INCHIKEY: "https://www.chemspider.com",
NamespacePrefix.IUPHAR_LIGAND: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
NamespacePrefix.GUIDETOPHARMACOLOGY: "https://www.guidetopharmacology.org/GRAC/LigandListForward",
NamespacePrefix.MMSL: "https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html",
NamespacePrefix.MSH: "https://id.nlm.nih.gov/mesh/",
NamespacePrefix.NCIT: "http://purl.obolibrary.org/obo/ncit.owl",
NamespacePrefix.NDC: "https://dps.fda.gov/ndc",
NamespacePrefix.PUBCHEMCOMPOUND: "https://pubchem.ncbi.nlm.nih.gov/docs/compounds",
NamespacePrefix.PUBCHEMSUBSTANCE: "https://pubchem.ncbi.nlm.nih.gov/docs/substances",
NamespacePrefix.RXNORM: "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
NamespacePrefix.SPL: "https://www.fda.gov/industry/fda-data-standards-advisory-board/structured-product-labeling-resources",
NamespacePrefix.UMLS: "https://www.nlm.nih.gov/research/umls/index.html",
NamespacePrefix.UNII: "https://precision.fda.gov/uniisearch",
NamespacePrefix.UNIPROT: "https://www.uniprot.org",
NamespacePrefix.USP: "https://www.usp.org/health-quality-safety/compendial-nomenclature",
NamespacePrefix.VANDF: "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF",
NamespacePrefix.WIKIDATA: "https://www.wikidata.org",
}

# URI to source
SYSTEM_URI_TO_NAMESPACE = {
system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
}


class DataLicenseAttributes(BaseModel):
"""Define constraints for data license attributes."""

Expand Down Expand Up @@ -484,7 +522,6 @@ class UnmergedNormalizationService(BaseNormalizationService):
class NormalizationService(BaseNormalizationService):
"""Response containing one or more merged records and source data."""

normalized_id: str | None = None
therapy: MappableConcept | None = None
source_meta_: dict[SourceName, SourceMeta] | None = None

Expand All @@ -494,18 +531,31 @@ class NormalizationService(BaseNormalizationService):
"query": "cisplatin",
"warnings": None,
"match_type": 80,
"normalized_id": "rxcui:2555",
"therapy": {
"type": "Therapy",
"conceptType": "Therapy",
"primaryCode": "rxcui:2555",
"id": "normalize.therapy.rxcui:2555",
"label": "cisplatin",
"mappings": [
{
"coding": {"code": "C376", "system": "ncit"},
"coding": {
"code": "2555",
"system": "https://www.nlm.nih.gov/research/umls/rxnorm/index.html",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "C376",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "15663-27-1", "system": "chemidplus"},
"coding": {
"code": "15663-27-1",
"system": "https://pubchem.ncbi.nlm.nih.gov/source/ChemIDplus",
},
"relation": "relatedMatch",
},
{
Expand Down
Loading

0 comments on commit 0dba1c1

Please sign in to comment.