Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Latest version of Semsimian dependency and search functionality #645

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,252 changes: 1,680 additions & 1,572 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ pysolr = "^3.9.0"
eutils = ">=0.6.0"
requests-cache = "^1.0.1"
click = "*"
semsimian = "0.2.1"
semsimian = ">=0.2.9"
urllib3 = {version = "< 2", optional = true}
pydantic = "*"
scipy = "*"

[tool.poetry.dev-dependencies]
pytest = "^7.1.3"
Expand Down
8 changes: 8 additions & 0 deletions src/oaklib/datamodels/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,11 @@ class SEMAPV(Enum):
MappingReview = "semapv:MappingReview"
ManualMappingCuration = "semapv:ManualMappingCuration"
RegularExpressionReplacement = "semapv:RegularExpressionReplacement"


class SearchType(Enum):
"""Search type for semsimian's association search."""

FLAT = "flat"
HYBRID = "hybrid"
FULL = "full"
62 changes: 59 additions & 3 deletions src/oaklib/implementations/semsimian/semsimian_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import math
from dataclasses import dataclass, field
from typing import ClassVar, Dict, Iterable, Iterator, List, Optional, Tuple, Union
from typing import Any, ClassVar, Dict, Iterable, Iterator, List, Optional, Tuple, Union

from semsimian import Semsimian

Expand All @@ -13,8 +13,11 @@
TermPairwiseSimilarity,
TermSetPairwiseSimilarity,
)
from oaklib.datamodels.vocabulary import OWL_THING
from oaklib.datamodels.vocabulary import OWL_THING, SearchType
from oaklib.implementations.sqldb.sql_implementation import SqlImplementation
from oaklib.interfaces.association_provider_interface import (
AssociationProviderInterface,
)
from oaklib.interfaces.basic_ontology_interface import BasicOntologyInterface
from oaklib.interfaces.obograph_interface import OboGraphInterface
from oaklib.interfaces.search_interface import SearchInterface
Expand All @@ -29,7 +32,9 @@


@dataclass
class SemSimianImplementation(SearchInterface, SemanticSimilarityInterface, OboGraphInterface):
class SemSimianImplementation(
SearchInterface, SemanticSimilarityInterface, OboGraphInterface, AssociationProviderInterface
):
"""Rust implementation of semantic similarity measures."""

delegated_methods: ClassVar[List[str]] = [
Expand Down Expand Up @@ -291,3 +296,54 @@ def _regain_element_formats(self, value: str) -> Union[str, float]:
except ValueError:
pass
return value

# TODO : Search feature for monarch app
def associations_subject_search(
self,
subjects: Iterable[CURIE] = None,
predicates: Iterable[PRED_CURIE] = None,
objects: Iterable[CURIE] = None,
property_filter: Dict[PRED_CURIE, Any] = None,
subject_closure_predicates: Optional[List[PRED_CURIE]] = None,
predicate_closure_predicates: Optional[List[PRED_CURIE]] = None,
object_closure_predicates: Optional[List[PRED_CURIE]] = None,
subject_prefixes: Optional[List[str]] = None,
include_similarity_object: bool = False,
method: Optional[str] = None,
search_type: Optional[str] = SearchType.HYBRID.value,
limit: Optional[int] = 10,
sort_by_similarity: bool = True,
**kwargs,
) -> Iterator[Tuple[float, Optional[TermSetPairwiseSimilarity], CURIE]]:
"""Search over all subjects in the association index.

:param subjects: Collection of subject CURIEs, defaults to None
:param predicates: Collection of predicate CURIEs, defaults to None
:param objects: Collection of object CURIEs, defaults to None
:param property_filter: _description_, defaults to None
:param subject_closure_predicates: _description_, defaults to None
:param predicate_closure_predicates: _description_, defaults to None
:param object_closure_predicates: closure to use over the ontology, defaults to None
:param subject_prefixes: only consider subjects with these prefixes, defaults to None
:param include_similarity_object: include the similarity object in the result, defaults to False
:param method: similarity method to use, defaults to None
:param search_type: Type of semsimian search to perform, defaults to HYBRID [Other choices: FULL, FLAT]
:param limit: max number of results to return, defaults to 10
:param sort_by_similarity: Boolean determining sorting of results or no, defaults to True
:yield: iterator over ordered pairs of (score, sim, subject)
"""
semsimian = self._get_semsimian_object(
predicates=object_closure_predicates,
attributes=self.termset_pairwise_similarity_attributes,
)
subjects = set(subjects) if subjects is not None else None

return semsimian.associations_search(
predicates,
set(objects),
include_similarity_object,
search_type,
subjects,
subject_prefixes,
limit,
)
1 change: 1 addition & 0 deletions src/oaklib/interfaces/association_provider_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def associations_subject_search(
subject_prefixes: Optional[List[str]] = None,
include_similarity_object: bool = False,
method: Optional[str] = None,
search_type: Optional[str] = None,
limit: Optional[int] = 10,
sort_by_similarity: bool = True,
**kwargs,
Expand Down
2 changes: 1 addition & 1 deletion src/oaklib/utilities/lexical/lexical_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def lexical_index_to_sssom(
:param subjects: An optional collection of entities, if specified, then only subjects in this set are reported
:param objects: An optional collection of entities, if specified, then only objects in this set are reported
:param symmetric: If true, then mappings in either direction are reported
:param ensure_strict_prefixes: If true, prefixes & mappings in SSSOM MappingSetDataFrame will be filtred.
:param ensure_strict_prefixes: If true, prefixes & mappings in SSSOM MappingSetDataFrame will be filtered.
:return: SSSOM MappingSetDataFrame object.
"""
mappings = []
Expand Down
Binary file modified tests/input/go-nucleus.db
Binary file not shown.
54 changes: 53 additions & 1 deletion tests/test_implementations/test_semsimian_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from linkml_runtime.dumpers import yaml_dumper

from oaklib.datamodels.similarity import TermPairwiseSimilarity
from oaklib.datamodels.vocabulary import IS_A, PART_OF
from oaklib.datamodels.vocabulary import IS_A, PART_OF, SearchType
from oaklib.interfaces.semsim_interface import SemanticSimilarityInterface
from oaklib.selector import get_adapter
from tests import (
Expand Down Expand Up @@ -156,3 +156,55 @@ def test_semsimian_object_cache(self):

self.assertEqual(len(self.oi.semsimian_object_cache), 1)
self.assertTrue(time_taken_1 > time_taken_2)

def test_associations_search(self):
assoc_predicate = {"biolink:has_nucleus"}
object_terms = {"GO:0019222"}
subject_prefixes = ["GO:"]
limit = 10
search_type = SearchType.FULL.value

result = self.oi.associations_subject_search(
object_closure_predicates=self.predicates,
predicates=assoc_predicate,
objects=object_terms,
include_similarity_object=True,
subject_prefixes=subject_prefixes,
search_type=search_type,
limit=limit,
)

self.assertEqual(len(result), limit)
self.assertEqual(result[0][0], 2.9817189317909856)

def test_associations_cache(self):
assoc_predicate = {"biolink:has_nucleus"}
object_terms = {"GO:0019222"}
subject_prefixes = ["GO:"]
limit = 10
start_time = timeit.default_timer()
_ = self.oi.associations_subject_search(
object_closure_predicates=self.predicates,
predicates=assoc_predicate,
objects=object_terms,
include_similarity_object=True,
subject_prefixes=subject_prefixes,
limit=limit,
)

end_time = timeit.default_timer()
time_taken_1 = end_time - start_time

start_time = timeit.default_timer()
_ = self.oi.associations_subject_search(
object_closure_predicates=self.predicates,
predicates=assoc_predicate,
objects=object_terms,
include_similarity_object=True,
subject_prefixes=subject_prefixes,
limit=limit,
)
end_time = timeit.default_timer()
time_taken_2 = end_time - start_time

self.assertTrue(time_taken_1 > time_taken_2)