add retries for openai LLM rate limit errors (run-llama#11867)

bdonkey · Mar 12, 2024 · d33b789 · d33b789
1 parent 11578cf
commit d33b789
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 2 deletions.
diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/base.py
@@ -46,6 +46,7 @@
 from llama_index.core.llms.llm import LLM
 from llama_index.core.types import BaseOutputParser, PydanticProgramMode
 from llama_index.llms.openai.utils import (
+    create_retry_decorator,
     from_openai_message,
     is_chat_model,
     is_function_calling_model,
@@ -64,6 +65,14 @@
 
 DEFAULT_OPENAI_MODEL = "gpt-3.5-turbo"
 
+llm_retry_decorator = create_retry_decorator(
+    max_retries=6,
+    random_exponential=True,
+    stop_after_delay_seconds=60,
+    min_seconds=1,
+    max_seconds=20,
+)
+
 
 @runtime_checkable
 class Tokenizer(Protocol):
@@ -290,6 +299,7 @@ def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
             base_kwargs["max_tokens"] = self.max_tokens
         return {**base_kwargs, **self.additional_kwargs}
 
+    @llm_retry_decorator
     def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
         client = self._get_client()
         message_dicts = to_openai_message_dicts(messages)
@@ -353,6 +363,7 @@ def _update_tool_calls(
                 t.id += tc_delta.id or ""
         return tool_calls
 
+    @llm_retry_decorator
     def _stream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseGen:
@@ -405,6 +416,7 @@ def gen() -> ChatResponseGen:
 
         return gen()
 
+    @llm_retry_decorator
     def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
         client = self._get_client()
         all_kwargs = self._get_model_kwargs(**kwargs)
@@ -422,6 +434,7 @@ def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
             additional_kwargs=self._get_response_token_counts(response),
         )
 
+    @llm_retry_decorator
     def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
         client = self._get_client()
         all_kwargs = self._get_model_kwargs(**kwargs)
@@ -530,6 +543,7 @@ async def astream_complete(
             astream_complete_fn = self._astream_complete
         return await astream_complete_fn(prompt, **kwargs)
 
+    @llm_retry_decorator
     async def _achat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponse:
@@ -547,6 +561,7 @@ async def _achat(
             additional_kwargs=self._get_response_token_counts(response),
         )
 
+    @llm_retry_decorator
     async def _astream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseAsyncGen:
@@ -610,6 +625,7 @@ async def gen() -> ChatResponseAsyncGen:
 
         return gen()
 
+    @llm_retry_decorator
     async def _acomplete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
         aclient = self._get_aclient()
         all_kwargs = self._get_model_kwargs(**kwargs)
@@ -627,6 +643,7 @@ async def _acomplete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
             additional_kwargs=self._get_response_token_counts(response),
         )
 
+    @llm_retry_decorator
     async def _astream_complete(
         self, prompt: str, **kwargs: Any
     ) -> CompletionResponseAsyncGen:

diff --git a/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py b/llama-index-integrations/llms/llama-index-llms-openai/llama_index/llms/openai/utils.py
@@ -135,7 +135,7 @@ def create_retry_decorator(
     random_exponential: bool = False,
     stop_after_delay_seconds: Optional[float] = None,
     min_seconds: float = 4,
-    max_seconds: float = 10,
+    max_seconds: float = 60,
 ) -> Callable[[Any], Any]:
     wait_strategy = (
         wait_random_exponential(min=min_seconds, max=max_seconds)

diff --git a/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-openai/pyproject.toml
@@ -29,7 +29,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-openai"
 readme = "README.md"
-version = "0.1.7"
+version = "0.1.8"
 
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"