INCATools · cmungall · Mar 12, 2024 · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/docs/howtos/use-llms.rst b/docs/howtos/use-llms.rst
@@ -9,6 +9,10 @@ of tasks, including:
 - Annotating text using ontology terms
 - Reviewing ontology branches or different kinds of ontology axioms
 
+For more on LLMs, see:
+
+- `Google LLM guide <https://ai.google.dev/docs/concepts>`_
+
 This guide is in 3 sections:
 
 - Summary of ontology-LLM tools that directly leverage OAK, but are not part of the OAK framework
@@ -135,6 +139,33 @@ mechanism for adding new models. See `Plugin index <https://llm.datasette.io/en/
 However, LLM can sometimes be slow to add new models, so here it can be useful to the awesome
 `LiteLLM <https://github.com/BerriAI/litellm/>`_ package, which provides a proxy to a wide range of models.
 
+Installing LLM plugins
+~~~~~~~~~~~~~~~~~~~~~~
+
+The ``llm`` command line tool makes it easy to access other models via its
+`extensible plugin system <https://llm.datasette.io/en/stable/plugins/index.html>`_.
+
+Normally, you would do something like this:
+
+.. code-block:: bash
+
+    pipx install llm
+    llm install llm-gemini
+    llm -m gemini-pro "what is the best ontology?"
+
+However, this will install the plugin in a different environment from OAK. If you are running OAK
+as a developer, then you can do this:
+
+.. code-block:: bash
+
+    cd ontology-access-kit
+    poetry run llm install llm-gemini
+
+This will install the plugin in the same environment as OAK.
+
+TODO: instructions for non-developers.
+
+
 Mixtral via Ollama and LiteLLM
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,6 +40,7 @@ semsimian = "^0.2.1"
 urllib3 = {version = "< 2", optional = true}
 pydantic = "*"
 jsonlines = "^4.0.0"
+tenacity = "^8.2.3"
 
 [tool.poetry.dev-dependencies]
 pytest = "^7.1.3"

diff --git a/src/oaklib/implementations/llm_implementation.py b/src/oaklib/implementations/llm_implementation.py
@@ -3,10 +3,17 @@
 import json
 import logging
 import re
+import time
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, Iterable, Iterator, List, Optional, Tuple
 
 from sssom_schema import Mapping
+from tenacity import (
+    retry,
+    retry_if_exception,
+    stop_after_attempt,
+    wait_random_exponential,
+)
 
 from oaklib import BasicOntologyInterface
 from oaklib.datamodels.obograph import DefinitionPropertyValue
@@ -69,6 +76,28 @@ def _prefix(curie: CURIE) -> str:
     return curie.split(":")[0].lower()
 
 
+def is_rate_limit_error(exception):
+    # List of fully qualified names of RateLimitError exceptions from various libraries
+    rate_limit_errors = [
+        "openai.error.RateLimitError",
+        "anthropic.error.RateLimitError",
+        # Add more as needed
+    ]
+    exception_full_name = f"{exception.__class__.__module__}.{exception.__class__.__name__}"
+    logger.warning(f"Exception_full_name: {exception_full_name}")
+    logger.warning(f"Exception: {exception}")
+    return exception_full_name in rate_limit_errors
+
+
+@retry(
+    retry=retry_if_exception(is_rate_limit_error),
+    wait=wait_random_exponential(multiplier=1, max=40),
+    stop=stop_after_attempt(3),
+)
+def query_model(model, *args, **kwargs):
+    return model.prompt(*args, **kwargs)
+
+
 @dataclass
 class LLMImplementation(
     OboGraphInterface,
@@ -104,6 +133,8 @@ class LLMImplementation(
 
     max_recursion_depth: int = 0
 
+    throttle_time: float = 0.0
+
     def __post_init__(self):
         slug = self.resource.slug
         if not slug:
@@ -127,6 +158,11 @@ def __post_init__(self):
             import llm
 
             self.model = llm.get_model(self.model_id)
+            if "claude" in self.model_id or "openrouter" in self.model_id:
+                # TODO: claude API seems to have its own rate limiting
+                # TODO: openrouter just seems very flaky
+                # but it is too conservative
+                self.throttle_time = 10
 
     def entities(self, **kwargs) -> Iterator[CURIE]:
         """Return all entities in the ontology."""
@@ -374,7 +410,9 @@ def _get_description(entity: CURIE) -> Optional[str]:
             )
             logger.debug(f"System: {system_prompt}")
             logger.info(f"Prompt: {main_prompt}")
-            response = model.prompt(main_prompt, system=system_prompt).text()
+            time.sleep(self.throttle_time)
+            response = query_model(model, main_prompt, system=system_prompt).text()
+            # response = model.prompt(main_prompt, system=system_prompt).text()
             logger.info(f"Response: {response}")
             try:
                 obj = json.loads(response)
@@ -385,7 +423,8 @@ def _get_description(entity: CURIE) -> Optional[str]:
                 extra += f"This resulted in: {e}\n"
                 extra += "Please try again, WITH VALID JSON."
                 extra += "Do not apologize or give more verbiage, JUST JSON."
-                response = model.prompt(main_prompt + extra, system=system_prompt).text()
+                logger.info(f"New Prompt: {main_prompt + extra}")
+                response = query_model(model, main_prompt + extra, system=system_prompt).text()
                 try:
                     obj = json.loads(response)
                 except json.JSONDecodeError as e: