Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
cmungall committed Oct 22, 2024
1 parent fb327b4 commit 0bd6e67
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 24 deletions.
1 change: 0 additions & 1 deletion src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
from prefixmaps.io.parser import load_multi_context
from pydantic import BaseModel
from sssom.parsers import parse_sssom_table, to_mapping_set_document
from tornado.gen import multi

import oaklib.datamodels.taxon_constraints as tcdm
from oaklib import datamodels
Expand Down
2 changes: 0 additions & 2 deletions src/oaklib/implementations/cx/cx_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,3 @@ def __post_init__(self):
locator = path
cx = ndex2.create_nice_cx_from_file(path)
self.obograph_document = from_cx(cx)


46 changes: 25 additions & 21 deletions src/oaklib/implementations/llm_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import time
from dataclasses import dataclass
from typing import TYPE_CHECKING, Dict, Iterable, Iterator, List, Optional, Tuple, Any
from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List, Optional, Tuple

import pystow
from linkml_runtime.dumpers import yaml_dumper
Expand Down Expand Up @@ -236,7 +236,9 @@ def __post_init__(self):
def _embeddings_collection_name(self) -> str:
name = self.wrapped_adapter.resource.slug
if not name:
raise ValueError(f"Wrapped adapter must have a slug: {self.wrapped_adapter} // {self.wrapped_adapter.resource}")
raise ValueError(
f"Wrapped adapter must have a slug: {self.wrapped_adapter} // {self.wrapped_adapter.resource}"
)
return name

def entities(self, **kwargs) -> Iterator[CURIE]:
Expand Down Expand Up @@ -281,7 +283,6 @@ def _parse_response(self, json_str: str) -> Any:
json_str = json_str[4:].strip()
return json.loads(json_str)


def get_model(self):
model = self.model
if not self.model:
Expand All @@ -297,6 +298,7 @@ def get_model(self):
def _embed_terms(self):
import llm
import sqlite_utils

adapter = self.wrapped_adapter
name = self._embeddings_collection_name
path_to_db = pystow.join("oaklib", "llm", "embeddings")
Expand All @@ -308,14 +310,13 @@ def _embed_terms(self):

def _term_embedding(self, id: CURIE) -> Optional[tuple]:
import llm

db = self._embeddings_collection.db
name = self._embeddings_collection_name
collection_ids = list(db["collections"].rows_where("name = ?", (name,)))
collection_id = collection_ids[0]["id"]
matches = list(
db["embeddings"].rows_where(
"collection_id = ? and id = ?", (collection_id, id)
)
db["embeddings"].rows_where("collection_id = ? and id = ?", (collection_id, id))
)
if not matches:
logger.debug(f"ID not found: {id} in {collection_id} ({name})")
Expand All @@ -324,18 +325,18 @@ def _term_embedding(self, id: CURIE) -> Optional[tuple]:
comparison_vector = llm.decode(embedding)
return comparison_vector


def pairwise_similarity(
self,
subject: CURIE,
object: CURIE,
predicates: List[PRED_CURIE] = None,
subject_ancestors: List[CURIE] = None,
object_ancestors: List[CURIE] = None,
min_jaccard_similarity: Optional[float] = None,
min_ancestor_information_content: Optional[float] = None,
self,
subject: CURIE,
object: CURIE,
predicates: List[PRED_CURIE] = None,
subject_ancestors: List[CURIE] = None,
object_ancestors: List[CURIE] = None,
min_jaccard_similarity: Optional[float] = None,
min_ancestor_information_content: Optional[float] = None,
) -> Optional[TermPairwiseSimilarity]:
import llm

self._embed_terms()
subject_embedding = self._term_embedding(subject)
if not subject_embedding:
Expand All @@ -351,7 +352,9 @@ def pairwise_similarity(
)
return sim

def _ground_term(self, term: str, categories: Optional[List[str]] = None) -> Optional[Tuple[str, float]]:
def _ground_term(
self, term: str, categories: Optional[List[str]] = None
) -> Optional[Tuple[str, float]]:
matches = list(self._match_terms(term))
system = """
Given a list of ontology terms, find the one that best matches the given term.
Expand All @@ -361,7 +364,7 @@ def _ground_term(self, term: str, categories: Optional[List[str]] = None) -> Opt
- ANAT:002 pericardium
Then a valid response is {"id": "ANAT:001", "confidence": 0.8}.
"""
prompt = f"Find the best match for the term: \"{term}\".\n"
prompt = f'Find the best match for the term: "{term}".\n'
if categories:
if len(categories) == 1:
prompt += f"Term Category: {categories[0]}.\n"
Expand Down Expand Up @@ -401,7 +404,11 @@ def annotate_text(
grounded, _confidence = self._ground_term(text, configuration.categories)
logger.info(f"Grounded {text} to {grounded}")
if grounded:
yield TextAnnotation(subject_label=text, object_id=grounded, object_label=self.wrapped_adapter.label(grounded))
yield TextAnnotation(
subject_label=text,
object_id=grounded,
object_label=self.wrapped_adapter.label(grounded),
)
return
else:
logging.info("Delegating directly to grounder, bypassing LLM")
Expand Down Expand Up @@ -495,9 +502,6 @@ def _match_terms(self, text: str) -> Iterator[Tuple[str, float]]:
logger.debug(f"Similar: {entry}")
yield entry.id, entry.score




def _suggest_aliases(
self,
term: str,
Expand Down

0 comments on commit 0bd6e67

Please sign in to comment.