Skip to content

Commit

Permalink
add retries for openai LLM rate limit errors (run-llama#11867)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich authored Mar 12, 2024
1 parent 11578cf commit d33b789
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from llama_index.core.llms.llm import LLM
from llama_index.core.types import BaseOutputParser, PydanticProgramMode
from llama_index.llms.openai.utils import (
create_retry_decorator,
from_openai_message,
is_chat_model,
is_function_calling_model,
Expand All @@ -64,6 +65,14 @@

DEFAULT_OPENAI_MODEL = "gpt-3.5-turbo"

llm_retry_decorator = create_retry_decorator(
max_retries=6,
random_exponential=True,
stop_after_delay_seconds=60,
min_seconds=1,
max_seconds=20,
)


@runtime_checkable
class Tokenizer(Protocol):
Expand Down Expand Up @@ -290,6 +299,7 @@ def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
base_kwargs["max_tokens"] = self.max_tokens
return {**base_kwargs, **self.additional_kwargs}

@llm_retry_decorator
def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
client = self._get_client()
message_dicts = to_openai_message_dicts(messages)
Expand Down Expand Up @@ -353,6 +363,7 @@ def _update_tool_calls(
t.id += tc_delta.id or ""
return tool_calls

@llm_retry_decorator
def _stream_chat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponseGen:
Expand Down Expand Up @@ -405,6 +416,7 @@ def gen() -> ChatResponseGen:

return gen()

@llm_retry_decorator
def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
client = self._get_client()
all_kwargs = self._get_model_kwargs(**kwargs)
Expand All @@ -422,6 +434,7 @@ def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
additional_kwargs=self._get_response_token_counts(response),
)

@llm_retry_decorator
def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
client = self._get_client()
all_kwargs = self._get_model_kwargs(**kwargs)
Expand Down Expand Up @@ -530,6 +543,7 @@ async def astream_complete(
astream_complete_fn = self._astream_complete
return await astream_complete_fn(prompt, **kwargs)

@llm_retry_decorator
async def _achat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponse:
Expand All @@ -547,6 +561,7 @@ async def _achat(
additional_kwargs=self._get_response_token_counts(response),
)

@llm_retry_decorator
async def _astream_chat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponseAsyncGen:
Expand Down Expand Up @@ -610,6 +625,7 @@ async def gen() -> ChatResponseAsyncGen:

return gen()

@llm_retry_decorator
async def _acomplete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
aclient = self._get_aclient()
all_kwargs = self._get_model_kwargs(**kwargs)
Expand All @@ -627,6 +643,7 @@ async def _acomplete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
additional_kwargs=self._get_response_token_counts(response),
)

@llm_retry_decorator
async def _astream_complete(
self, prompt: str, **kwargs: Any
) -> CompletionResponseAsyncGen:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def create_retry_decorator(
random_exponential: bool = False,
stop_after_delay_seconds: Optional[float] = None,
min_seconds: float = 4,
max_seconds: float = 10,
max_seconds: float = 60,
) -> Callable[[Any], Any]:
wait_strategy = (
wait_random_exponential(min=min_seconds, max=max_seconds)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-llms-openai"
readme = "README.md"
version = "0.1.7"
version = "0.1.8"

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
Expand Down

0 comments on commit d33b789

Please sign in to comment.