-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support retry policy for completion / acompletion #6916
base: main
Are you sure you want to change the base?
Conversation
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
The latest updates on your projects. Learn more about Vercel for Git ↗︎
|
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
|
||
try: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The majority of the diff in wrapper below this line is just un-indenting the try / catch block now that retry logic from Router is used instead. You can ignore everything except https://github.com/BerriAI/litellm/pull/6916/files#r1860469861 until https://github.com/BerriAI/litellm/pull/6916/files#r1858292742
litellm/utils.py
Outdated
except Exception as e: | ||
call_type = original_function.__name__ | ||
if call_type == CallTypes.completion.value: | ||
num_retries = ( | ||
kwargs.get("num_retries", None) or litellm.num_retries or None | ||
) | ||
litellm.num_retries = ( | ||
None # set retries to None to prevent infinite loops | ||
) | ||
context_window_fallback_dict = kwargs.get( | ||
"context_window_fallback_dict", {} | ||
) | ||
|
||
_is_litellm_router_call = "model_group" in kwargs.get( | ||
"metadata", {} | ||
) # check if call from litellm.router/proxy | ||
if ( | ||
num_retries and not _is_litellm_router_call | ||
): # only enter this if call is not from litellm router/proxy. router has it's own logic for retrying | ||
if ( | ||
isinstance(e, openai.APIError) | ||
or isinstance(e, openai.Timeout) | ||
or isinstance(e, openai.APIConnectionError) | ||
): | ||
kwargs["num_retries"] = num_retries | ||
return litellm.completion_with_retries(*args, **kwargs) | ||
elif ( | ||
isinstance(e, litellm.exceptions.ContextWindowExceededError) | ||
and context_window_fallback_dict | ||
and model in context_window_fallback_dict | ||
and not _is_litellm_router_call | ||
): | ||
if len(args) > 0: | ||
args[0] = context_window_fallback_dict[model] # type: ignore | ||
else: | ||
kwargs["model"] = context_window_fallback_dict[model] | ||
return original_function(*args, **kwargs) | ||
traceback_exception = traceback.format_exc() | ||
end_time = datetime.datetime.now() | ||
|
||
# LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated | ||
if logging_obj: | ||
logging_obj.failure_handler( | ||
e, traceback_exception, start_time, end_time | ||
) # DO NOT MAKE THREADED - router retry fallback relies on this! | ||
raise e |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic has now be removed, since we're now reusing retry logic from Router
except Exception as e: | ||
traceback_exception = traceback.format_exc() | ||
end_time = datetime.datetime.now() | ||
if logging_obj: | ||
try: | ||
logging_obj.failure_handler( | ||
e, traceback_exception, start_time, end_time | ||
) # DO NOT MAKE THREADED - router retry fallback relies on this! | ||
except Exception as e: | ||
raise e | ||
try: | ||
await logging_obj.async_failure_handler( | ||
e, traceback_exception, start_time, end_time | ||
) | ||
except Exception as e: | ||
raise e | ||
|
||
call_type = original_function.__name__ | ||
if call_type == CallTypes.acompletion.value: | ||
num_retries = ( | ||
kwargs.get("num_retries", None) or litellm.num_retries or None | ||
) | ||
litellm.num_retries = ( | ||
None # set retries to None to prevent infinite loops | ||
) | ||
context_window_fallback_dict = kwargs.get( | ||
"context_window_fallback_dict", {} | ||
) | ||
|
||
_is_litellm_router_call = "model_group" in kwargs.get( | ||
"metadata", {} | ||
) # check if call from litellm.router/proxy | ||
if ( | ||
num_retries and not _is_litellm_router_call | ||
): # only enter this if call is not from litellm router/proxy. router has it's own logic for retrying | ||
try: | ||
kwargs["num_retries"] = num_retries | ||
kwargs["original_function"] = original_function | ||
if isinstance( | ||
e, openai.RateLimitError | ||
): # rate limiting specific error | ||
kwargs["retry_strategy"] = "exponential_backoff_retry" | ||
elif isinstance(e, openai.APIError): # generic api error | ||
kwargs["retry_strategy"] = "constant_retry" | ||
return await litellm.acompletion_with_retries(*args, **kwargs) | ||
except Exception: | ||
pass | ||
elif ( | ||
isinstance(e, litellm.exceptions.ContextWindowExceededError) | ||
and context_window_fallback_dict | ||
and model in context_window_fallback_dict | ||
): | ||
if len(args) > 0: | ||
args[0] = context_window_fallback_dict[model] # type: ignore | ||
else: | ||
kwargs["model"] = context_window_fallback_dict[model] | ||
return await original_function(*args, **kwargs) | ||
raise e |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic has now be removed, since we're now reusing retry logic from Router
): | ||
raise ValueError("model param not passed in.") | ||
|
||
try: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The majority of the diff in wrapper below this line is just un-indenting the try / catch block now that retry logic from Router is used instead. You can ignore everything except https://github.com/BerriAI/litellm/pull/6916/files#r1860468305 until https://github.com/BerriAI/litellm/pull/6916/files#r1858295149
Signed-off-by: dbczumar <[email protected]>
litellm/main.py
Outdated
@@ -372,6 +372,8 @@ async def acompletion( | |||
LITELLM Specific Params | |||
mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None). | |||
custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock" | |||
max_retries (int, optional): The number of retries to attempt (default is 0). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
acompletion
supports max_retries
already today. it just wasn't documented here
|
||
verbose_router_logger.debug( | ||
f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}" | ||
) | ||
try: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All of the deleted code below was moved into a utils.py
file so that it can be shared with wrapper / wrapper_async, which power the completion()
and acompletion()
APIs
@@ -0,0 +1,394 @@ | |||
import asyncio |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The contents of this file are moved from router.py
: https://github.com/BerriAI/litellm/pull/6916/files#r1858305693. There aren't any other notable changes (preexisting logic is preserved)
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
@krrishdholakia Can you take a look at this PR? |
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
Signed-off-by: dbczumar <[email protected]>
num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) | ||
result = await async_run_with_retries( | ||
original_function=original_function, | ||
original_function_args=args, | ||
original_function_kwargs=kwargs, | ||
num_retries=num_retries, | ||
retry_after=0, | ||
retry_policy=kwargs.get("retry_policy"), | ||
fallbacks=kwargs.get("fallbacks", []), | ||
context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( | ||
model, [] | ||
), | ||
content_policy_fallbacks=[], | ||
get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( | ||
model | ||
), | ||
log_retry=lambda kwargs, e: kwargs, | ||
model_list=[], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated model call to perform retries
result = run_with_retries( | ||
original_function=original_function, | ||
original_function_args=args, | ||
original_function_kwargs=kwargs, | ||
num_retries=num_retries, | ||
retry_after=0, | ||
retry_policy=kwargs.get("retry_policy"), | ||
fallbacks=kwargs.get("fallbacks", []), | ||
context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( | ||
model, [] | ||
), | ||
content_policy_fallbacks=[], | ||
get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( | ||
model | ||
), | ||
log_retry=lambda kwargs, e: kwargs, | ||
model_list=[], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated model call to perform retries
Title
Support retry policy for completion / acompletion
Relevant issues
Fixes #6623
Type
🆕 New Feature
Changes
Moves policy-based retry logic from Router into a utils file, which is then called by Router and wrapper / async_wrapper
[REQUIRED] Testing - Attach a screenshot of any new tests passing locall
If UI changes, send a screenshot/GIF of working UI fixes
I also repeated this exercise with acompletion: