From 245824291b3f841b1e8e771d4ea54a598fc7caed Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Thu, 1 Aug 2024 09:42:47 -0700 Subject: [PATCH] set_label will INSERT if label not already presented. Fixes #788 (#789) --- .../aggregator/aggregator_implementation.py | 5 +++++ .../sqldb/sql_implementation.py | 18 +++++++++++++----- .../utilities/lexical/lexical_indexer.py | 4 ++-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/oaklib/implementations/aggregator/aggregator_implementation.py b/src/oaklib/implementations/aggregator/aggregator_implementation.py index 2e991512e..ced4293b1 100644 --- a/src/oaklib/implementations/aggregator/aggregator_implementation.py +++ b/src/oaklib/implementations/aggregator/aggregator_implementation.py @@ -1,3 +1,4 @@ +import logging from collections import defaultdict from dataclasses import dataclass from io import TextIOWrapper @@ -140,6 +141,10 @@ def sssom_mappings(self, *args, **kwargs) -> Iterable[Mapping]: def label(self, curie: CURIE, **kwargs) -> str: return self._delegate_first(lambda i: i.label(curie, **kwargs)) + def set_label(self, curie: CURIE, label: str) -> None: + logging.debug(f"Assuming {curie} is in first aggregated resource, label={label}") + return self._delegate_first(lambda i: i.set_label(curie, label)) + def curies_by_label(self, label: str) -> List[CURIE]: return list(self._delegate_iterator(lambda i: i.curies_by_label(label))) diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py index 3b839c5c0..8b2283cc8 100644 --- a/src/oaklib/implementations/sqldb/sql_implementation.py +++ b/src/oaklib/implementations/sqldb/sql_implementation.py @@ -788,11 +788,19 @@ def _execute(self, stmt): self.save() def set_label(self, curie: CURIE, label: str) -> bool: - stmt = ( - update(Statements) - .where(and_(Statements.subject == curie, Statements.predicate == LABEL_PREDICATE)) - .values(value=label) - ) + existing_label = self.label(curie) + if existing_label: + stmt = ( + update(Statements) + .where(and_(Statements.subject == curie, Statements.predicate == LABEL_PREDICATE)) + .values(value=label) + ) + else: + stmt = ( + insert(Statements) + .values(subject=curie, predicate=LABEL_PREDICATE, value=label) + .execution_options(autocommit=True) + ) self._execute(stmt) def basic_search(self, search_term: str, config: SearchConfiguration = None) -> Iterable[CURIE]: diff --git a/src/oaklib/utilities/lexical/lexical_indexer.py b/src/oaklib/utilities/lexical/lexical_indexer.py index a039e3664..888648a00 100644 --- a/src/oaklib/utilities/lexical/lexical_indexer.py +++ b/src/oaklib/utilities/lexical/lexical_indexer.py @@ -79,9 +79,8 @@ def add_labels_from_uris(oi: BasicOntologyInterface): if curie.startswith("", "") label = " ".join(label.split("_")) - # print(f'{curie} ==> {label} // {type(oi)}') oi.set_label(curie, label) - # print(oi.get_label_by_curie(curie)) + def create_lexical_index( @@ -127,6 +126,7 @@ def _invert_mapping_pred(mapping_pred: PRED_CURIE) -> PRED_CURIE: logging.info("Creating mapping index") mapping_pairs_by_curie = defaultdict(list) for curie in oi.entities(): + logging.debug(f"Finding mappings for {curie}") pairs = list(oi.simple_mappings_by_curie(curie)) for pred, object_id in pairs: mapping_pairs_by_curie[curie].append((pred, object_id))