From 49be573ce46d8e0ceb72621c6040b4f78e63bafc Mon Sep 17 00:00:00 2001 From: dbczumar Date: Mon, 25 Nov 2024 20:42:11 -0800 Subject: [PATCH 01/27] fix Signed-off-by: dbczumar --- litellm/router.py | 241 +++----------------- litellm/router_utils/retry_utils.py | 335 ++++++++++++++++++++++++++++ 2 files changed, 363 insertions(+), 213 deletions(-) create mode 100644 litellm/router_utils/retry_utils.py diff --git a/litellm/router.py b/litellm/router.py index f724c96c4e1e..14ccc1bc2e0c 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -86,6 +86,11 @@ async_raise_no_deployment_exception, send_llm_exception_alert, ) +from litellm.router_utils.retry_utils import ( + handle_mock_testing_rate_limit_error, + run_async_with_retries, + should_retry_this_error, +) from litellm.router_utils.router_callbacks.track_deployment_metrics import ( increment_deployment_failures_for_current_minute, increment_deployment_successes_for_current_minute, @@ -2878,206 +2883,30 @@ async def async_function_with_retries(self, *args, **kwargs): # noqa: PLR0915 if model_list is not None: _metadata.update({"model_group_size": len(model_list)}) - verbose_router_logger.debug( - f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}" - ) - try: - self._handle_mock_testing_rate_limit_error( - model_group=model_group, kwargs=kwargs - ) - # if the function call is successful, no exception will be raised and we'll break out of the loop - response = await self.make_call(original_function, *args, **kwargs) - - return response - except Exception as e: - current_attempt = None - original_exception = e - - """ - Retry Logic - """ - _healthy_deployments, _all_deployments = ( - await self._async_get_healthy_deployments( - model=kwargs.get("model") or "", - parent_otel_span=parent_otel_span, - ) - ) - - # raises an exception if this error should not be retries - self.should_retry_this_error( - error=e, - healthy_deployments=_healthy_deployments, - all_deployments=_all_deployments, - context_window_fallbacks=context_window_fallbacks, - regular_fallbacks=fallbacks, - content_policy_fallbacks=content_policy_fallbacks, - ) - - if ( - self.retry_policy is not None - or self.model_group_retry_policy is not None - ): - # get num_retries from retry policy - _retry_policy_retries = self.get_num_retries_from_retry_policy( - exception=original_exception, model_group=kwargs.get("model") - ) - if _retry_policy_retries is not None: - num_retries = _retry_policy_retries - ## LOGGING - if num_retries > 0: - kwargs = self.log_retry(kwargs=kwargs, e=original_exception) - else: - raise - - # decides how long to sleep before retry - retry_after = self._time_to_sleep_before_retry( - e=original_exception, - remaining_retries=num_retries, - num_retries=num_retries, - healthy_deployments=_healthy_deployments, - ) - - await asyncio.sleep(retry_after) - for current_attempt in range(num_retries): - try: - # if the function call is successful, no exception will be raised and we'll break out of the loop - response = await self.make_call(original_function, *args, **kwargs) - if inspect.iscoroutinefunction( - response - ): # async errors are often returned as coroutines - response = await response - return response - - except Exception as e: - ## LOGGING - kwargs = self.log_retry(kwargs=kwargs, e=e) - remaining_retries = num_retries - current_attempt - _model: Optional[str] = kwargs.get("model") # type: ignore - if _model is not None: - _healthy_deployments, _ = ( - await self._async_get_healthy_deployments( - model=_model, - parent_otel_span=parent_otel_span, - ) - ) - else: - _healthy_deployments = [] - _timeout = self._time_to_sleep_before_retry( - e=original_exception, - remaining_retries=remaining_retries, - num_retries=num_retries, - healthy_deployments=_healthy_deployments, - ) - await asyncio.sleep(_timeout) - - if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES: - setattr(original_exception, "max_retries", num_retries) - setattr(original_exception, "num_retries", current_attempt) - - raise original_exception - - async def make_call(self, original_function: Any, *args, **kwargs): - """ - Handler for making a call to the .completion()/.embeddings()/etc. functions. - """ - model_group = kwargs.get("model") - response = original_function(*args, **kwargs) - if inspect.iscoroutinefunction(response) or inspect.isawaitable(response): - response = await response - ## PROCESS RESPONSE HEADERS - await self.set_response_headers(response=response, model_group=model_group) - - return response - - def _handle_mock_testing_rate_limit_error( - self, kwargs: dict, model_group: Optional[str] = None - ): - """ - Helper function to raise a mock litellm.RateLimitError error for testing purposes. - - Raises: - litellm.RateLimitError error when `mock_testing_rate_limit_error=True` passed in request params - """ - mock_testing_rate_limit_error: Optional[bool] = kwargs.pop( - "mock_testing_rate_limit_error", None - ) + retry_policy = self.retry_policy if ( - mock_testing_rate_limit_error is not None - and mock_testing_rate_limit_error is True - ): - verbose_router_logger.info( - f"litellm.router.py::_mock_rate_limit_error() - Raising mock RateLimitError for model={model_group}" - ) - raise litellm.RateLimitError( - model=model_group, - llm_provider="", - message=f"This is a mock exception for model={model_group}, to trigger a rate limit error.", - ) - - def should_retry_this_error( - self, - error: Exception, - healthy_deployments: Optional[List] = None, - all_deployments: Optional[List] = None, - context_window_fallbacks: Optional[List] = None, - content_policy_fallbacks: Optional[List] = None, - regular_fallbacks: Optional[List] = None, - ): - """ - 1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None - 2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None - - 2. raise an exception for RateLimitError if - - there are no fallbacks - - there are no healthy deployments in the same model group - """ - _num_healthy_deployments = 0 - if healthy_deployments is not None and isinstance(healthy_deployments, list): - _num_healthy_deployments = len(healthy_deployments) - _num_all_deployments = 0 - if all_deployments is not None and isinstance(all_deployments, list): - _num_all_deployments = len(all_deployments) - - ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR / CONTENT POLICY VIOLATION ERROR w/ fallbacks available / Bad Request Error - if ( - isinstance(error, litellm.ContextWindowExceededError) - and context_window_fallbacks is not None - ): - raise error - - if ( - isinstance(error, litellm.ContentPolicyViolationError) - and content_policy_fallbacks is not None + self.model_group_retry_policy is not None + and model_group is not None + and model_group in self.model_group_retry_policy ): - raise error - - if isinstance(error, litellm.NotFoundError): - raise error - # Error we should only retry if there are other deployments - if isinstance(error, openai.RateLimitError): - if ( - _num_healthy_deployments <= 0 # if no healthy deployments - and regular_fallbacks is not None # and fallbacks available - and len(regular_fallbacks) > 0 - ): - raise error # then raise the error - - if isinstance(error, openai.AuthenticationError): - """ - - if other deployments available -> retry - - else -> raise error - """ - if ( - _num_all_deployments <= 1 - ): # if there is only 1 deployment for this model group then don't retry - raise error # then raise error - - # Do not retry if there are no healthy deployments - # just raise the error - if _num_healthy_deployments <= 0: # if no healthy deployments - raise error + retry_policy = self.model_group_retry_policy.get(model_group, None) # type: ignore + if isinstance(retry_policy, dict): + retry_policy = RetryPolicy(**retry_policy) - return True + return await run_async_with_retries( + original_function=original_function, + num_retries=num_retries, + retry_after=self.retry_after, + retry_policy=retry_policy, + fallbacks=self.fallbacks or [], + context_window_fallbacks=self.context_window_fallbacks or [], + content_policy_fallbacks=self.content_policy_fallbacks or [], + get_healthy_deployments=self._async_get_healthy_deployments, + log_retry=self.log_retry, + model_list=self.get_model_list(), + *args, + **kwargs, + ) def function_with_fallbacks(self, *args, **kwargs): """ @@ -3202,9 +3031,7 @@ def function_with_retries(self, *args, **kwargs): try: # if the function call is successful, no exception will be raised and we'll break out of the loop - self._handle_mock_testing_rate_limit_error( - kwargs=kwargs, model_group=model_group - ) + handle_mock_testing_rate_limit_error(kwargs=kwargs, model_group=model_group) response = original_function(*args, **kwargs) return response except Exception as e: @@ -3222,7 +3049,7 @@ def function_with_retries(self, *args, **kwargs): ) # raises an exception if this error should not be retries - self.should_retry_this_error( + should_retry_this_error( error=e, healthy_deployments=_healthy_deployments, all_deployments=_all_deployments, @@ -4468,18 +4295,6 @@ async def get_model_group_usage( rpm_usage += t return tpm_usage, rpm_usage - async def set_response_headers( - self, response: Any, model_group: Optional[str] = None - ) -> Any: - """ - Add the most accurate rate limit headers for a given model response. - - ## TODO: add model group rate limit headers - # - if healthy_deployments > 1, return model group rate limit headers - # - else return the model's rate limit headers - """ - return response - def get_model_ids(self, model_name: Optional[str] = None) -> List[str]: """ if 'model_name' is none, returns all. diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py new file mode 100644 index 000000000000..e9396484ee5b --- /dev/null +++ b/litellm/router_utils/retry_utils.py @@ -0,0 +1,335 @@ +import asyncio +import inspect +from typing import Any, Callable, Dict, List, Optional, Union + +import httpx +import openai + +import litellm +from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs +from litellm.types.router import DeploymentTypedDict, RetryPolicy + + +async def run_async_with_retries( + original_function: Callable, + num_retries: int, + retry_after: int, # min time to wait before retrying a failed request + retry_policy: Optional[ + Union[RetryPolicy, Dict] + ], # set custom retries for different exceptions + fallbacks: List, + context_window_fallbacks: List, + content_policy_fallbacks: List, + get_healthy_deployments: Callable, + log_retry: Callable, + model_list: Optional[List[DeploymentTypedDict]], + *args, + **kwargs, +): # noqa: PLR0915 + parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs) + + ## ADD MODEL GROUP SIZE TO METADATA - used for model_group_rate_limit_error tracking + model_group: Optional[str] = kwargs.get("model") + _metadata: dict = kwargs.get("metadata") or {} + if "model_group" in _metadata and isinstance(_metadata["model_group"], str): + if model_list is not None: + _metadata.update({"model_group_size": len(model_list)}) + + # TODO: Figure out logging - take a logger arg? + # verbose_router_logger.debug( + # f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}" + # ) + try: + handle_mock_testing_rate_limit_error(model_group=model_group, kwargs=kwargs) + # if the function call is successful, no exception will be raised and we'll break out of the loop + response = await _make_call(original_function, *args, **kwargs) + + return response + except Exception as e: + current_attempt = None + original_exception = e + + """ + Retry Logic + """ + _healthy_deployments, _all_deployments = await get_healthy_deployments( + model=kwargs.get("model") or "", + parent_otel_span=parent_otel_span, + ) + + # raises an exception if this error should not be retried + should_retry_this_error( + error=e, + healthy_deployments=_healthy_deployments, + all_deployments=_all_deployments, + context_window_fallbacks=context_window_fallbacks, + regular_fallbacks=fallbacks, + content_policy_fallbacks=content_policy_fallbacks, + ) + + if retry_policy is not None: + # get num_retries from retry policy + _retry_policy_retries = _get_num_retries_from_retry_policy( + retry_policy=retry_policy, + exception=original_exception, + ) + if _retry_policy_retries is not None: + num_retries = _retry_policy_retries + ## LOGGING + if num_retries > 0: + kwargs = log_retry(kwargs=kwargs, e=original_exception) + else: + raise + + # decides how long to sleep before retry + sleep_time: Union[int, float] = _time_to_sleep_before_retry( + e=original_exception, + remaining_retries=num_retries, + num_retries=num_retries, + retry_after=retry_after, + healthy_deployments=_healthy_deployments, + ) + retry_after = int(sleep_time) + + await asyncio.sleep(sleep_time) + for current_attempt in range(num_retries): + try: + # if the function call is successful, no exception will be raised and we'll break out of the loop + response = await _make_call(original_function, *args, **kwargs) + if inspect.iscoroutinefunction( + response + ): # async errors are often returned as coroutines + response = await response + return response + + except Exception as e: + ## LOGGING + kwargs = log_retry(kwargs=kwargs, e=e) + remaining_retries = num_retries - current_attempt + _model: Optional[str] = kwargs.get("model") # type: ignore + if _model is not None: + _healthy_deployments, _ = await get_healthy_deployments( + model=_model, + parent_otel_span=parent_otel_span, + ) + else: + _healthy_deployments = [] + _timeout = _time_to_sleep_before_retry( + e=original_exception, + remaining_retries=remaining_retries, + num_retries=num_retries, + retry_after=retry_after, + healthy_deployments=_healthy_deployments, + ) + await asyncio.sleep(_timeout) + + if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES: + setattr(original_exception, "max_retries", num_retries) + setattr(original_exception, "num_retries", current_attempt) + + raise original_exception + + +def should_retry_this_error( + error: Exception, + healthy_deployments: Optional[List] = None, + all_deployments: Optional[List] = None, + context_window_fallbacks: Optional[List] = None, + content_policy_fallbacks: Optional[List] = None, + regular_fallbacks: Optional[List] = None, +): + """ + 1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None + 2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None + + 2. raise an exception for RateLimitError if + - there are no fallbacks + - there are no healthy deployments in the same model group + """ + _num_healthy_deployments = 0 + if healthy_deployments is not None and isinstance(healthy_deployments, list): + _num_healthy_deployments = len(healthy_deployments) + _num_all_deployments = 0 + if all_deployments is not None and isinstance(all_deployments, list): + _num_all_deployments = len(all_deployments) + + ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR / CONTENT POLICY VIOLATION ERROR w/ fallbacks available / Bad Request Error + if ( + isinstance(error, litellm.ContextWindowExceededError) + and context_window_fallbacks is not None + ): + raise error + + if ( + isinstance(error, litellm.ContentPolicyViolationError) + and content_policy_fallbacks is not None + ): + raise error + + if isinstance(error, litellm.NotFoundError): + raise error + # Error we should only retry if there are other deployments + if isinstance(error, openai.RateLimitError): + if ( + _num_healthy_deployments <= 0 # if no healthy deployments + and regular_fallbacks is not None # and fallbacks available + and len(regular_fallbacks) > 0 + ): + raise error # then raise the error + + if isinstance(error, openai.AuthenticationError): + """ + - if other deployments available -> retry + - else -> raise error + """ + if ( + _num_all_deployments <= 1 + ): # if there is only 1 deployment for this model group then don't retry + raise error # then raise error + + # Do not retry if there are no healthy deployments + # just raise the error + if _num_healthy_deployments <= 0: # if no healthy deployments + raise error + + return True + + +def handle_mock_testing_rate_limit_error( + kwargs: dict, model_group: Optional[str] = None +): + """ + Helper function to raise a mock litellm.RateLimitError error for testing purposes. + + Raises: + litellm.RateLimitError error when `mock_testing_rate_limit_error=True` passed in request params + """ + mock_testing_rate_limit_error: Optional[bool] = kwargs.pop( + "mock_testing_rate_limit_error", None + ) + if ( + mock_testing_rate_limit_error is not None + and mock_testing_rate_limit_error is True + ): + # TODO: Figure out logging - take a logger arg? + # verbose_router_logger.info( + # f"litellm.router.py::_mock_rate_limit_error() - Raising mock RateLimitError for model={model_group}" + # ) + raise litellm.RateLimitError( + model=model_group, + llm_provider="", + message=f"This is a mock exception for model={model_group}, to trigger a rate limit error.", + ) + + +async def _make_call(original_function: Any, *args, **kwargs): + """ + Handler for making a call to the .completion()/.embeddings()/etc. functions. + """ + model_group = kwargs.get("model") + response = original_function(*args, **kwargs) + if inspect.iscoroutinefunction(response) or inspect.isawaitable(response): + response = await response + ## PROCESS RESPONSE HEADERS + await _set_response_headers(response=response, model_group=model_group) + + return response + + +async def _set_response_headers( + response: Any, model_group: Optional[str] = None +) -> Any: + """ + Add the most accurate rate limit headers for a given model response. + + ## TODO: add model group rate limit headers + # - if healthy_deployments > 1, return model group rate limit headers + # - else return the model's rate limit headers + """ + return response + + +def _get_num_retries_from_retry_policy( + retry_policy: Union[RetryPolicy, Dict], + exception: Exception, +): + """ + BadRequestErrorRetries: Optional[int] = None + AuthenticationErrorRetries: Optional[int] = None + TimeoutErrorRetries: Optional[int] = None + RateLimitErrorRetries: Optional[int] = None + ContentPolicyViolationErrorRetries: Optional[int] = None + """ + if isinstance(retry_policy, dict): + retry_policy = RetryPolicy(**retry_policy) + + if ( + isinstance(exception, litellm.BadRequestError) + and retry_policy.BadRequestErrorRetries is not None + ): + return retry_policy.BadRequestErrorRetries + if ( + isinstance(exception, litellm.AuthenticationError) + and retry_policy.AuthenticationErrorRetries is not None + ): + return retry_policy.AuthenticationErrorRetries + if ( + isinstance(exception, litellm.Timeout) + and retry_policy.TimeoutErrorRetries is not None + ): + return retry_policy.TimeoutErrorRetries + if ( + isinstance(exception, litellm.RateLimitError) + and retry_policy.RateLimitErrorRetries is not None + ): + return retry_policy.RateLimitErrorRetries + if ( + isinstance(exception, litellm.ContentPolicyViolationError) + and retry_policy.ContentPolicyViolationErrorRetries is not None + ): + return retry_policy.ContentPolicyViolationErrorRetries + + +def _time_to_sleep_before_retry( + e: Exception, + remaining_retries: int, + num_retries: int, + retry_after: int, + healthy_deployments: Optional[List] = None, +) -> Union[int, float]: + """ + Calculate back-off, then retry + + It should instantly retry only when: + 1. there are healthy deployments in the same model group + 2. there are fallbacks for the completion call + """ + if ( + healthy_deployments is not None + and isinstance(healthy_deployments, list) + and len(healthy_deployments) > 1 + ): + return 0 + + response_headers: Optional[httpx.Headers] = None + if hasattr(e, "response") and hasattr(e.response, "headers"): # type: ignore + response_headers = e.response.headers # type: ignore + if hasattr(e, "litellm_response_headers"): + response_headers = e.litellm_response_headers # type: ignore + + if response_headers is not None: + timeout = litellm._calculate_retry_after( + remaining_retries=remaining_retries, + max_retries=num_retries, + response_headers=response_headers, + min_timeout=retry_after, + ) + + else: + timeout = litellm._calculate_retry_after( + remaining_retries=remaining_retries, + max_retries=num_retries, + min_timeout=retry_after, + ) + + return timeout From ea530ee67f1ca645224526c8f2ef1c711d5ad47c Mon Sep 17 00:00:00 2001 From: dbczumar Date: Mon, 25 Nov 2024 20:44:48 -0800 Subject: [PATCH 02/27] fix Signed-off-by: dbczumar --- litellm/router.py | 7 ------- litellm/router_utils/retry_utils.py | 5 ++--- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 14ccc1bc2e0c..125b5912728f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2876,13 +2876,6 @@ async def async_function_with_retries(self, *args, **kwargs): # noqa: PLR0915 model_group: Optional[str] = kwargs.get("model") num_retries = kwargs.pop("num_retries") - ## ADD MODEL GROUP SIZE TO METADATA - used for model_group_rate_limit_error tracking - _metadata: dict = kwargs.get("metadata") or {} - if "model_group" in _metadata and isinstance(_metadata["model_group"], str): - model_list = self.get_model_list(model_name=_metadata["model_group"]) - if model_list is not None: - _metadata.update({"model_group_size": len(model_list)}) - retry_policy = self.retry_policy if ( self.model_group_retry_policy is not None diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index e9396484ee5b..36bf6d12345a 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -82,16 +82,15 @@ async def run_async_with_retries( raise # decides how long to sleep before retry - sleep_time: Union[int, float] = _time_to_sleep_before_retry( + retry_after = _time_to_sleep_before_retry( # type: ignore e=original_exception, remaining_retries=num_retries, num_retries=num_retries, retry_after=retry_after, healthy_deployments=_healthy_deployments, ) - retry_after = int(sleep_time) - await asyncio.sleep(sleep_time) + await asyncio.sleep(retry_after) for current_attempt in range(num_retries): try: # if the function call is successful, no exception will be raised and we'll break out of the loop From a5b75eb5e2a77533e050e5ed0ba18e21a84aee28 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Mon, 25 Nov 2024 20:51:30 -0800 Subject: [PATCH 03/27] fix Signed-off-by: dbczumar --- litellm/router.py | 102 ++-------------------------- litellm/router_utils/retry_utils.py | 6 +- 2 files changed, 8 insertions(+), 100 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 125b5912728f..95b93db6cde5 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -90,6 +90,7 @@ handle_mock_testing_rate_limit_error, run_async_with_retries, should_retry_this_error, + time_to_sleep_before_retry, ) from litellm.router_utils.router_callbacks.track_deployment_metrics import ( increment_deployment_failures_for_current_minute, @@ -2960,50 +2961,6 @@ def _get_fallback_model_group_from_fallbacks( break return fallback_model_group - def _time_to_sleep_before_retry( - self, - e: Exception, - remaining_retries: int, - num_retries: int, - healthy_deployments: Optional[List] = None, - ) -> Union[int, float]: - """ - Calculate back-off, then retry - - It should instantly retry only when: - 1. there are healthy deployments in the same model group - 2. there are fallbacks for the completion call - """ - if ( - healthy_deployments is not None - and isinstance(healthy_deployments, list) - and len(healthy_deployments) > 1 - ): - return 0 - - response_headers: Optional[httpx.Headers] = None - if hasattr(e, "response") and hasattr(e.response, "headers"): # type: ignore - response_headers = e.response.headers # type: ignore - if hasattr(e, "litellm_response_headers"): - response_headers = e.litellm_response_headers # type: ignore - - if response_headers is not None: - timeout = litellm._calculate_retry_after( - remaining_retries=remaining_retries, - max_retries=num_retries, - response_headers=response_headers, - min_timeout=self.retry_after, - ) - - else: - timeout = litellm._calculate_retry_after( - remaining_retries=remaining_retries, - max_retries=num_retries, - min_timeout=self.retry_after, - ) - - return timeout - def function_with_retries(self, *args, **kwargs): """ Try calling the model 3 times. Shuffle-between available deployments. @@ -3052,10 +3009,11 @@ def function_with_retries(self, *args, **kwargs): ) # decides how long to sleep before retry - _timeout = self._time_to_sleep_before_retry( + _timeout = time_to_sleep_before_retry( e=original_exception, remaining_retries=num_retries, num_retries=num_retries, + retry_after=self.retry_after, healthy_deployments=_healthy_deployments, ) @@ -3086,10 +3044,11 @@ def function_with_retries(self, *args, **kwargs): parent_otel_span=parent_otel_span, ) remaining_retries = num_retries - current_attempt - _timeout = self._time_to_sleep_before_retry( + _timeout = time_to_sleep_before_retry( e=e, remaining_retries=remaining_retries, num_retries=num_retries, + retry_after=self.retry_after, healthy_deployments=_healthy_deployments, ) time.sleep(_timeout) @@ -5211,57 +5170,6 @@ def _track_deployment_metrics( except Exception as e: verbose_router_logger.error(f"Error in _track_deployment_metrics: {str(e)}") - def get_num_retries_from_retry_policy( - self, exception: Exception, model_group: Optional[str] = None - ): - """ - BadRequestErrorRetries: Optional[int] = None - AuthenticationErrorRetries: Optional[int] = None - TimeoutErrorRetries: Optional[int] = None - RateLimitErrorRetries: Optional[int] = None - ContentPolicyViolationErrorRetries: Optional[int] = None - """ - # if we can find the exception then in the retry policy -> return the number of retries - retry_policy: Optional[RetryPolicy] = self.retry_policy - - if ( - self.model_group_retry_policy is not None - and model_group is not None - and model_group in self.model_group_retry_policy - ): - retry_policy = self.model_group_retry_policy.get(model_group, None) # type: ignore - - if retry_policy is None: - return None - if isinstance(retry_policy, dict): - retry_policy = RetryPolicy(**retry_policy) - - if ( - isinstance(exception, litellm.BadRequestError) - and retry_policy.BadRequestErrorRetries is not None - ): - return retry_policy.BadRequestErrorRetries - if ( - isinstance(exception, litellm.AuthenticationError) - and retry_policy.AuthenticationErrorRetries is not None - ): - return retry_policy.AuthenticationErrorRetries - if ( - isinstance(exception, litellm.Timeout) - and retry_policy.TimeoutErrorRetries is not None - ): - return retry_policy.TimeoutErrorRetries - if ( - isinstance(exception, litellm.RateLimitError) - and retry_policy.RateLimitErrorRetries is not None - ): - return retry_policy.RateLimitErrorRetries - if ( - isinstance(exception, litellm.ContentPolicyViolationError) - and retry_policy.ContentPolicyViolationErrorRetries is not None - ): - return retry_policy.ContentPolicyViolationErrorRetries - def get_allowed_fails_from_policy(self, exception: Exception): """ BadRequestErrorRetries: Optional[int] = None diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index 36bf6d12345a..f74660fa1f91 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -82,7 +82,7 @@ async def run_async_with_retries( raise # decides how long to sleep before retry - retry_after = _time_to_sleep_before_retry( # type: ignore + retry_after = time_to_sleep_before_retry( # type: ignore e=original_exception, remaining_retries=num_retries, num_retries=num_retries, @@ -113,7 +113,7 @@ async def run_async_with_retries( ) else: _healthy_deployments = [] - _timeout = _time_to_sleep_before_retry( + _timeout = time_to_sleep_before_retry( e=original_exception, remaining_retries=remaining_retries, num_retries=num_retries, @@ -289,7 +289,7 @@ def _get_num_retries_from_retry_policy( return retry_policy.ContentPolicyViolationErrorRetries -def _time_to_sleep_before_retry( +def time_to_sleep_before_retry( e: Exception, remaining_retries: int, num_retries: int, From 508b83bfce7ae04243b2d6d9640ddb6917e7a160 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Mon, 25 Nov 2024 23:16:35 -0800 Subject: [PATCH 04/27] Progress Signed-off-by: dbczumar --- litellm/utils.py | 326 +++++++++++++++++++++-------------------------- 1 file changed, 146 insertions(+), 180 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index 262af341817b..6068332473a8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -97,6 +97,7 @@ from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.litellm_core_utils.token_counter import get_modified_max_tokens from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.router_utils.retry_utils import run_async_with_retries from litellm.secret_managers.main import get_secret from litellm.types.llms.openai import ( AllMessageValues, @@ -960,7 +961,44 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 raise e @wraps(original_function) - async def wrapper_async(*args, **kwargs): # noqa: PLR0915 + async def wrapper_async(*args, **kwargs): + model = "" + try: + model = args[0] if len(args) > 0 else kwargs["model"] + except Exception: + call_type = original_function.__name__ + if ( + call_type != CallTypes.aimage_generation.value # model optional + and call_type != CallTypes.atext_completion.value # can also be engine + and call_type != CallTypes.amoderation.value + ): + raise ValueError("model param not passed in.") + + async def get_healthy_deployments(*args, **kwargs): + return [], [] + + return await run_async_with_retries( + original_function=original_function, + num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, + retry_after=0, + retry_policy=kwargs.get("retry_policy"), + fallbacks=kwargs.get("fallbacks", []), + context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( + model, [] + ), + content_policy_fallbacks=[], + # TODO: Explain + get_healthy_deployments=get_healthy_deployments, + # TODO: Explain + log_retry=lambda kwargs, _: kwargs, + model_list=[], + *args, + **kwargs, + ) + + async def _wrapper_async( + original_function, model, *args, **kwargs + ): # noqa: PLR0915 print_args_passed_to_litellm(original_function, args, kwargs) start_time = datetime.datetime.now() result = None @@ -977,202 +1015,130 @@ async def wrapper_async(*args, **kwargs): # noqa: PLR0915 if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) - model = "" - try: - model = args[0] if len(args) > 0 else kwargs["model"] - except Exception: - if ( - call_type != CallTypes.aimage_generation.value # model optional - and call_type != CallTypes.atext_completion.value # can also be engine - and call_type != CallTypes.amoderation.value - ): - raise ValueError("model param not passed in.") - - try: - if logging_obj is None: - logging_obj, kwargs = function_setup( - original_function.__name__, rules_obj, start_time, *args, **kwargs - ) - kwargs["litellm_logging_obj"] = logging_obj - logging_obj._llm_caching_handler = _llm_caching_handler - # [OPTIONAL] CHECK BUDGET - if litellm.max_budget: - if litellm._current_cost > litellm.max_budget: - raise BudgetExceededError( - current_cost=litellm._current_cost, - max_budget=litellm.max_budget, - ) - - # [OPTIONAL] CHECK CACHE - print_verbose( - f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}" + if logging_obj is None: + logging_obj, kwargs = function_setup( + original_function.__name__, rules_obj, start_time, *args, **kwargs ) - _caching_handler_response: CachingHandlerResponse = ( - await _llm_caching_handler._async_get_cache( - model=model, - original_function=original_function, - logging_obj=logging_obj, - start_time=start_time, - call_type=call_type, - kwargs=kwargs, - args=args, + kwargs["litellm_logging_obj"] = logging_obj + logging_obj._llm_caching_handler = _llm_caching_handler + # [OPTIONAL] CHECK BUDGET + if litellm.max_budget: + if litellm._current_cost > litellm.max_budget: + raise BudgetExceededError( + current_cost=litellm._current_cost, + max_budget=litellm.max_budget, ) - ) - if ( - _caching_handler_response.cached_result is not None - and _caching_handler_response.final_embedding_cached_response is None - ): - return _caching_handler_response.cached_result - - elif _caching_handler_response.embedding_all_elements_cache_hit is True: - return _caching_handler_response.final_embedding_cached_response - # MODEL CALL - result = await original_function(*args, **kwargs) - end_time = datetime.datetime.now() - if "stream" in kwargs and kwargs["stream"] is True: - if ( - "complete_response" in kwargs - and kwargs["complete_response"] is True - ): - chunks = [] - for idx, chunk in enumerate(result): - chunks.append(chunk) - return litellm.stream_chunk_builder( - chunks, messages=kwargs.get("messages", None) - ) - else: - return result - elif call_type == CallTypes.arealtime.value: - return result - - # ADD HIDDEN PARAMS - additional call metadata - if hasattr(result, "_hidden_params"): - result._hidden_params["litellm_call_id"] = getattr( - logging_obj, "litellm_call_id", None - ) - result._hidden_params["model_id"] = kwargs.get("model_info", {}).get( - "id", None - ) - result._hidden_params["api_base"] = get_api_base( - model=model, - optional_params=kwargs, - ) - result._hidden_params["response_cost"] = ( - logging_obj._response_cost_calculator(result=result) - ) - result._hidden_params["additional_headers"] = process_response_headers( - result._hidden_params.get("additional_headers") or {} - ) # GUARANTEE OPENAI HEADERS IN RESPONSE - if ( - isinstance(result, ModelResponse) - or isinstance(result, EmbeddingResponse) - or isinstance(result, TranscriptionResponse) - ): - setattr( - result, - "_response_ms", - (end_time - start_time).total_seconds() * 1000, - ) # return response latency in ms like openai - - ### POST-CALL RULES ### - post_call_processing( - original_response=result, model=model, optional_params=kwargs - ) - - ## Add response to cache - await _llm_caching_handler.async_set_cache( - result=result, + # [OPTIONAL] CHECK CACHE + print_verbose( + f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}" + ) + _caching_handler_response: CachingHandlerResponse = ( + await _llm_caching_handler._async_get_cache( + model=model, original_function=original_function, + logging_obj=logging_obj, + start_time=start_time, + call_type=call_type, kwargs=kwargs, args=args, ) + ) + if ( + _caching_handler_response.cached_result is not None + and _caching_handler_response.final_embedding_cached_response is None + ): + return _caching_handler_response.cached_result + + elif _caching_handler_response.embedding_all_elements_cache_hit is True: + return _caching_handler_response.final_embedding_cached_response + + # MODEL CALL + result = await original_function(*args, **kwargs) + end_time = datetime.datetime.now() + if "stream" in kwargs and kwargs["stream"] is True: + if "complete_response" in kwargs and kwargs["complete_response"] is True: + chunks = [] + for idx, chunk in enumerate(result): + chunks.append(chunk) + return litellm.stream_chunk_builder( + chunks, messages=kwargs.get("messages", None) + ) + else: + return result + elif call_type == CallTypes.arealtime.value: + return result - # LOG SUCCESS - handle streaming success logging in the _next_ object - print_verbose( - f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}" + # ADD HIDDEN PARAMS - additional call metadata + if hasattr(result, "_hidden_params"): + result._hidden_params["litellm_call_id"] = getattr( + logging_obj, "litellm_call_id", None ) - # check if user does not want this to be logged - asyncio.create_task( - logging_obj.async_success_handler(result, start_time, end_time) + result._hidden_params["model_id"] = kwargs.get("model_info", {}).get( + "id", None ) - threading.Thread( - target=logging_obj.success_handler, - args=(result, start_time, end_time), - ).start() + result._hidden_params["api_base"] = get_api_base( + model=model, + optional_params=kwargs, + ) + result._hidden_params["response_cost"] = ( + logging_obj._response_cost_calculator(result=result) + ) + result._hidden_params["additional_headers"] = process_response_headers( + result._hidden_params.get("additional_headers") or {} + ) # GUARANTEE OPENAI HEADERS IN RESPONSE + if ( + isinstance(result, ModelResponse) + or isinstance(result, EmbeddingResponse) + or isinstance(result, TranscriptionResponse) + ): + setattr( + result, + "_response_ms", + (end_time - start_time).total_seconds() * 1000, + ) # return response latency in ms like openai - # REBUILD EMBEDDING CACHING - if ( - isinstance(result, EmbeddingResponse) - and _caching_handler_response.final_embedding_cached_response - is not None - ): - return _llm_caching_handler._combine_cached_embedding_response_with_api_result( + ### POST-CALL RULES ### + post_call_processing( + original_response=result, model=model, optional_params=kwargs + ) + + ## Add response to cache + await _llm_caching_handler.async_set_cache( + result=result, + original_function=original_function, + kwargs=kwargs, + args=args, + ) + + # LOG SUCCESS - handle streaming success logging in the _next_ object + print_verbose( + f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}" + ) + # check if user does not want this to be logged + asyncio.create_task( + logging_obj.async_success_handler(result, start_time, end_time) + ) + threading.Thread( + target=logging_obj.success_handler, + args=(result, start_time, end_time), + ).start() + + # REBUILD EMBEDDING CACHING + if ( + isinstance(result, EmbeddingResponse) + and _caching_handler_response.final_embedding_cached_response is not None + ): + return ( + _llm_caching_handler._combine_cached_embedding_response_with_api_result( _caching_handler_response=_caching_handler_response, embedding_response=result, start_time=start_time, end_time=end_time, ) + ) - return result - except Exception as e: - traceback_exception = traceback.format_exc() - end_time = datetime.datetime.now() - if logging_obj: - try: - logging_obj.failure_handler( - e, traceback_exception, start_time, end_time - ) # DO NOT MAKE THREADED - router retry fallback relies on this! - except Exception as e: - raise e - try: - await logging_obj.async_failure_handler( - e, traceback_exception, start_time, end_time - ) - except Exception as e: - raise e - - call_type = original_function.__name__ - if call_type == CallTypes.acompletion.value: - num_retries = ( - kwargs.get("num_retries", None) or litellm.num_retries or None - ) - litellm.num_retries = ( - None # set retries to None to prevent infinite loops - ) - context_window_fallback_dict = kwargs.get( - "context_window_fallback_dict", {} - ) - - _is_litellm_router_call = "model_group" in kwargs.get( - "metadata", {} - ) # check if call from litellm.router/proxy - if ( - num_retries and not _is_litellm_router_call - ): # only enter this if call is not from litellm router/proxy. router has it's own logic for retrying - try: - kwargs["num_retries"] = num_retries - kwargs["original_function"] = original_function - if isinstance( - e, openai.RateLimitError - ): # rate limiting specific error - kwargs["retry_strategy"] = "exponential_backoff_retry" - elif isinstance(e, openai.APIError): # generic api error - kwargs["retry_strategy"] = "constant_retry" - return await litellm.acompletion_with_retries(*args, **kwargs) - except Exception: - pass - elif ( - isinstance(e, litellm.exceptions.ContextWindowExceededError) - and context_window_fallback_dict - and model in context_window_fallback_dict - ): - if len(args) > 0: - args[0] = context_window_fallback_dict[model] # type: ignore - else: - kwargs["model"] = context_window_fallback_dict[model] - return await original_function(*args, **kwargs) - raise e + return result is_coroutine = inspect.iscoroutinefunction(original_function) From ddeb654ca6270e144e6380d1621c5c41cc69aa47 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 00:20:45 -0800 Subject: [PATCH 05/27] fix Signed-off-by: dbczumar --- litellm/router_utils/retry_utils.py | 45 +++++++++++++++++++++ litellm/utils.py | 63 +++++++++++++++++++++-------- 2 files changed, 91 insertions(+), 17 deletions(-) diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index f74660fa1f91..f079c7b9dde3 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -10,6 +10,51 @@ from litellm.types.router import DeploymentTypedDict, RetryPolicy +def run_with_retries( + original_function: Callable, + num_retries: int, + retry_after: int, # min time to wait before retrying a failed request + retry_policy: Optional[ + Union[RetryPolicy, Dict] + ], # set custom retries for different exceptions + fallbacks: List, + context_window_fallbacks: List, + content_policy_fallbacks: List, + get_healthy_deployments: Callable, + log_retry: Callable, + model_list: Optional[List[DeploymentTypedDict]], + *args, + **kwargs, +): + async_run_with_retries = run_async_with_retries( + original_function=original_function, + num_retries=num_retries, + retry_after=retry_after, + retry_policy=retry_policy, + fallbacks=fallbacks, + context_window_fallbacks=context_window_fallbacks, + content_policy_fallbacks=content_policy_fallbacks, + get_healthy_deployments=get_healthy_deployments, + log_retry=log_retry, + model_list=model_list, + *args, + **kwargs, + ) + try: + # Check if an event loop is already running + loop = asyncio.get_running_loop() + # If running in an async context, return the coroutine for awaiting + return async_run_with_retries + except RuntimeError: + # If no event loop is running, create a new one and run the task + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(async_run_with_retries) + finally: + loop.close() + + async def run_async_with_retries( original_function: Callable, num_retries: int, diff --git a/litellm/utils.py b/litellm/utils.py index 6068332473a8..6cd213660a9d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -97,7 +97,7 @@ from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.litellm_core_utils.token_counter import get_modified_max_tokens from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.router_utils.retry_utils import run_async_with_retries +from litellm.router_utils.retry_utils import run_async_with_retries, run_with_retries from litellm.secret_managers.main import get_secret from litellm.types.llms.openai import ( AllMessageValues, @@ -673,7 +673,44 @@ def post_call_processing(original_response, model, optional_params: Optional[dic raise e @wraps(original_function) - def wrapper(*args, **kwargs): # noqa: PLR0915 + def wrapper(*args, **kwargs): + model: Optional[str] = None + try: + model = args[0] if len(args) > 0 else kwargs.pop("model") + except Exception: + model = None + call_type = original_function.__name__ + if ( + call_type != CallTypes.image_generation.value + and call_type != CallTypes.text_completion.value + ): + raise ValueError("model param not passed in.") + + async def get_healthy_deployments(*args, **kwargs): + return [], [] + + return run_with_retries( + original_function=lambda *args, **kwargs: _wrapper( + original_function, model, *args, **kwargs + ), + num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, + retry_after=0, + retry_policy=kwargs.get("retry_policy"), + fallbacks=kwargs.get("fallbacks", []), + context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( + model, [] + ), + content_policy_fallbacks=[], + # TODO: Explain + get_healthy_deployments=get_healthy_deployments, + # TODO: Explain + log_retry=lambda kwargs, _: kwargs, + model_list=[], + *args, + **kwargs, + ) + + def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 # DO NOT MOVE THIS. It always needs to run first # Check if this is an async function. If so only execute the async function if ( @@ -727,17 +764,6 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) - model: Optional[str] = None - try: - model = args[0] if len(args) > 0 else kwargs["model"] - except Exception: - model = None - if ( - call_type != CallTypes.image_generation.value - and call_type != CallTypes.text_completion.value - ): - raise ValueError("model param not passed in.") - try: if logging_obj is None: logging_obj, kwargs = function_setup( @@ -847,7 +873,7 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 except Exception as e: print_verbose(f"Error while checking max token limit: {str(e)}") # MODEL CALL - result = original_function(*args, **kwargs) + result = original_function(*args, **{**kwargs, **{"model": model}}) end_time = datetime.datetime.now() if "stream" in kwargs and kwargs["stream"] is True: if ( @@ -964,7 +990,7 @@ def wrapper(*args, **kwargs): # noqa: PLR0915 async def wrapper_async(*args, **kwargs): model = "" try: - model = args[0] if len(args) > 0 else kwargs["model"] + model = args[0] if len(args) > 0 else kwargs.pop("model") except Exception: call_type = original_function.__name__ if ( @@ -978,7 +1004,9 @@ async def get_healthy_deployments(*args, **kwargs): return [], [] return await run_async_with_retries( - original_function=original_function, + original_function=lambda *args, **kwargs: _wrapper_async( + original_function, model, *args, **kwargs + ), num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, retry_after=0, retry_policy=kwargs.get("retry_policy"), @@ -1054,7 +1082,8 @@ async def _wrapper_async( return _caching_handler_response.final_embedding_cached_response # MODEL CALL - result = await original_function(*args, **kwargs) + # TODO: Clean this up! + result = await original_function(*args, **{**kwargs, **{"model": model}}) end_time = datetime.datetime.now() if "stream" in kwargs and kwargs["stream"] is True: if "complete_response" in kwargs and kwargs["complete_response"] is True: From f37dc92d47ef3e7abf403f145e298572bbea0eb0 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 00:50:13 -0800 Subject: [PATCH 06/27] fix Signed-off-by: dbczumar --- litellm/router.py | 8 +- litellm/router_utils/retry_utils.py | 57 +++-- litellm/utils.py | 368 ++++++++++++---------------- 3 files changed, 195 insertions(+), 238 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 95b93db6cde5..34b74e0bf873 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -87,8 +87,8 @@ send_llm_exception_alert, ) from litellm.router_utils.retry_utils import ( + async_run_with_retries, handle_mock_testing_rate_limit_error, - run_async_with_retries, should_retry_this_error, time_to_sleep_before_retry, ) @@ -2887,8 +2887,10 @@ async def async_function_with_retries(self, *args, **kwargs): # noqa: PLR0915 if isinstance(retry_policy, dict): retry_policy = RetryPolicy(**retry_policy) - return await run_async_with_retries( + return await async_run_with_retries( original_function=original_function, + original_function_args=args, + original_function_kwargs=kwargs, num_retries=num_retries, retry_after=self.retry_after, retry_policy=retry_policy, @@ -2898,8 +2900,6 @@ async def async_function_with_retries(self, *args, **kwargs): # noqa: PLR0915 get_healthy_deployments=self._async_get_healthy_deployments, log_retry=self.log_retry, model_list=self.get_model_list(), - *args, - **kwargs, ) def function_with_fallbacks(self, *args, **kwargs): diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index f079c7b9dde3..70242f75487e 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -1,6 +1,6 @@ import asyncio import inspect -from typing import Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import httpx import openai @@ -12,6 +12,8 @@ def run_with_retries( original_function: Callable, + original_function_args: Tuple, + original_function_kwargs: Dict[str, Any], num_retries: int, retry_after: int, # min time to wait before retrying a failed request retry_policy: Optional[ @@ -23,11 +25,11 @@ def run_with_retries( get_healthy_deployments: Callable, log_retry: Callable, model_list: Optional[List[DeploymentTypedDict]], - *args, - **kwargs, ): - async_run_with_retries = run_async_with_retries( + async_task = async_run_with_retries( original_function=original_function, + original_function_args=original_function_args, + original_function_kwargs=original_function_kwargs, num_retries=num_retries, retry_after=retry_after, retry_policy=retry_policy, @@ -37,26 +39,26 @@ def run_with_retries( get_healthy_deployments=get_healthy_deployments, log_retry=log_retry, model_list=model_list, - *args, - **kwargs, ) try: # Check if an event loop is already running loop = asyncio.get_running_loop() # If running in an async context, return the coroutine for awaiting - return async_run_with_retries + return async_task except RuntimeError: # If no event loop is running, create a new one and run the task loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - return loop.run_until_complete(async_run_with_retries) + return loop.run_until_complete(async_task) finally: loop.close() -async def run_async_with_retries( +async def async_run_with_retries( original_function: Callable, + original_function_args: Tuple, + original_function_kwargs: Dict[str, Any], num_retries: int, retry_after: int, # min time to wait before retrying a failed request retry_policy: Optional[ @@ -68,14 +70,12 @@ async def run_async_with_retries( get_healthy_deployments: Callable, log_retry: Callable, model_list: Optional[List[DeploymentTypedDict]], - *args, - **kwargs, ): # noqa: PLR0915 - parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs) + parent_otel_span = _get_parent_otel_span_from_kwargs(original_function_kwargs) ## ADD MODEL GROUP SIZE TO METADATA - used for model_group_rate_limit_error tracking - model_group: Optional[str] = kwargs.get("model") - _metadata: dict = kwargs.get("metadata") or {} + model_group: Optional[str] = original_function_kwargs.get("model") + _metadata: dict = original_function_kwargs.get("metadata") or {} if "model_group" in _metadata and isinstance(_metadata["model_group"], str): if model_list is not None: _metadata.update({"model_group_size": len(model_list)}) @@ -85,10 +85,13 @@ async def run_async_with_retries( # f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}" # ) try: - handle_mock_testing_rate_limit_error(model_group=model_group, kwargs=kwargs) + handle_mock_testing_rate_limit_error( + model_group=model_group, kwargs=original_function_kwargs + ) # if the function call is successful, no exception will be raised and we'll break out of the loop - response = await _make_call(original_function, *args, **kwargs) - + response = await _make_call( + original_function, *original_function_args, **original_function_kwargs + ) return response except Exception as e: current_attempt = None @@ -98,7 +101,7 @@ async def run_async_with_retries( Retry Logic """ _healthy_deployments, _all_deployments = await get_healthy_deployments( - model=kwargs.get("model") or "", + model=original_function_kwargs.get("model") or "", parent_otel_span=parent_otel_span, ) @@ -122,7 +125,10 @@ async def run_async_with_retries( num_retries = _retry_policy_retries ## LOGGING if num_retries > 0: - kwargs = log_retry(kwargs=kwargs, e=original_exception) + original_function_kwargs = log_retry( + original_function_kwargs=original_function_kwargs, + e=original_exception, + ) else: raise @@ -139,7 +145,11 @@ async def run_async_with_retries( for current_attempt in range(num_retries): try: # if the function call is successful, no exception will be raised and we'll break out of the loop - response = await _make_call(original_function, *args, **kwargs) + response = await _make_call( + original_function, + *original_function_args, + **original_function_kwargs, + ) if inspect.iscoroutinefunction( response ): # async errors are often returned as coroutines @@ -148,9 +158,12 @@ async def run_async_with_retries( except Exception as e: ## LOGGING - kwargs = log_retry(kwargs=kwargs, e=e) + original_function_kwargs = log_retry( + kwargs=original_function_kwargs, + e=e, + ) remaining_retries = num_retries - current_attempt - _model: Optional[str] = kwargs.get("model") # type: ignore + _model: Optional[str] = original_function_kwargs.get("model") # type: ignore if _model is not None: _healthy_deployments, _ = await get_healthy_deployments( model=_model, diff --git a/litellm/utils.py b/litellm/utils.py index 6cd213660a9d..eac43307d887 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -97,7 +97,7 @@ from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper from litellm.litellm_core_utils.token_counter import get_modified_max_tokens from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler -from litellm.router_utils.retry_utils import run_async_with_retries, run_with_retries +from litellm.router_utils.retry_utils import async_run_with_retries, run_with_retries from litellm.secret_managers.main import get_secret from litellm.types.llms.openai import ( AllMessageValues, @@ -693,6 +693,8 @@ async def get_healthy_deployments(*args, **kwargs): original_function=lambda *args, **kwargs: _wrapper( original_function, model, *args, **kwargs ), + original_function_args=args, + original_function_kwargs=kwargs, num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, retry_after=0, retry_policy=kwargs.get("retry_policy"), @@ -706,8 +708,6 @@ async def get_healthy_deployments(*args, **kwargs): # TODO: Explain log_retry=lambda kwargs, _: kwargs, model_list=[], - *args, - **kwargs, ) def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 @@ -764,227 +764,171 @@ def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) - try: - if logging_obj is None: - logging_obj, kwargs = function_setup( - original_function.__name__, rules_obj, start_time, *args, **kwargs - ) - kwargs["litellm_logging_obj"] = logging_obj - _llm_caching_handler: LLMCachingHandler = LLMCachingHandler( - original_function=original_function, - request_kwargs=kwargs, - start_time=start_time, + if logging_obj is None: + logging_obj, kwargs = function_setup( + original_function.__name__, rules_obj, start_time, *args, **kwargs ) - logging_obj._llm_caching_handler = _llm_caching_handler - - # CHECK FOR 'os.environ/' in kwargs - for k, v in kwargs.items(): - if v is not None and isinstance(v, str) and v.startswith("os.environ/"): - kwargs[k] = litellm.get_secret(v) - # [OPTIONAL] CHECK BUDGET - if litellm.max_budget: - if litellm._current_cost > litellm.max_budget: - raise BudgetExceededError( - current_cost=litellm._current_cost, - max_budget=litellm.max_budget, - ) + kwargs["litellm_logging_obj"] = logging_obj + _llm_caching_handler: LLMCachingHandler = LLMCachingHandler( + original_function=original_function, + request_kwargs=kwargs, + start_time=start_time, + ) + logging_obj._llm_caching_handler = _llm_caching_handler - # [OPTIONAL] CHECK MAX RETRIES / REQUEST - if litellm.num_retries_per_request is not None: - # check if previous_models passed in as ['litellm_params']['metadata]['previous_models'] - previous_models = kwargs.get("metadata", {}).get( - "previous_models", None + # CHECK FOR 'os.environ/' in kwargs + for k, v in kwargs.items(): + if v is not None and isinstance(v, str) and v.startswith("os.environ/"): + kwargs[k] = litellm.get_secret(v) + # [OPTIONAL] CHECK BUDGET + if litellm.max_budget: + if litellm._current_cost > litellm.max_budget: + raise BudgetExceededError( + current_cost=litellm._current_cost, + max_budget=litellm.max_budget, ) - if previous_models is not None: - if litellm.num_retries_per_request <= len(previous_models): - raise Exception("Max retries per request hit!") - # [OPTIONAL] CHECK CACHE - print_verbose( - f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}" - ) - # if caching is false or cache["no-cache"]==True, don't run this - if ( + # [OPTIONAL] CHECK MAX RETRIES / REQUEST + if litellm.num_retries_per_request is not None: + # check if previous_models passed in as ['litellm_params']['metadata]['previous_models'] + previous_models = kwargs.get("metadata", {}).get("previous_models", None) + if previous_models is not None: + if litellm.num_retries_per_request <= len(previous_models): + raise Exception("Max retries per request hit!") + + # [OPTIONAL] CHECK CACHE + print_verbose( + f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}" + ) + # if caching is false or cache["no-cache"]==True, don't run this + if ( + ( ( - ( - ( - kwargs.get("caching", None) is None - and litellm.cache is not None - ) - or kwargs.get("caching", False) is True - ) - and kwargs.get("cache", {}).get("no-cache", False) is not True + (kwargs.get("caching", None) is None and litellm.cache is not None) + or kwargs.get("caching", False) is True ) - and kwargs.get("aembedding", False) is not True - and kwargs.get("atext_completion", False) is not True - and kwargs.get("acompletion", False) is not True - and kwargs.get("aimg_generation", False) is not True - and kwargs.get("atranscription", False) is not True - and kwargs.get("arerank", False) is not True - and kwargs.get("_arealtime", False) is not True - ): # allow users to control returning cached responses from the completion function - # checking cache - verbose_logger.debug("INSIDE CHECKING SYNC CACHE") - caching_handler_response: CachingHandlerResponse = ( - _llm_caching_handler._sync_get_cache( - model=model or "", - original_function=original_function, - logging_obj=logging_obj, - start_time=start_time, - call_type=call_type, - kwargs=kwargs, - args=args, - ) + and kwargs.get("cache", {}).get("no-cache", False) is not True + ) + and kwargs.get("aembedding", False) is not True + and kwargs.get("atext_completion", False) is not True + and kwargs.get("acompletion", False) is not True + and kwargs.get("aimg_generation", False) is not True + and kwargs.get("atranscription", False) is not True + and kwargs.get("arerank", False) is not True + and kwargs.get("_arealtime", False) is not True + ): # allow users to control returning cached responses from the completion function + # checking cache + verbose_logger.debug("INSIDE CHECKING SYNC CACHE") + caching_handler_response: CachingHandlerResponse = ( + _llm_caching_handler._sync_get_cache( + model=model or "", + original_function=original_function, + logging_obj=logging_obj, + start_time=start_time, + call_type=call_type, + kwargs=kwargs, + args=args, ) + ) - if caching_handler_response.cached_result is not None: - return caching_handler_response.cached_result + if caching_handler_response.cached_result is not None: + return caching_handler_response.cached_result - # CHECK MAX TOKENS - if ( - kwargs.get("max_tokens", None) is not None - and model is not None - and litellm.modify_params - is True # user is okay with params being modified - and ( - call_type == CallTypes.acompletion.value - or call_type == CallTypes.completion.value + # CHECK MAX TOKENS + if ( + kwargs.get("max_tokens", None) is not None + and model is not None + and litellm.modify_params is True # user is okay with params being modified + and ( + call_type == CallTypes.acompletion.value + or call_type == CallTypes.completion.value + ) + ): + try: + base_model = model + if kwargs.get("hf_model_name", None) is not None: + base_model = f"huggingface/{kwargs.get('hf_model_name')}" + messages = None + if len(args) > 1: + messages = args[1] + elif kwargs.get("messages", None): + messages = kwargs["messages"] + user_max_tokens = kwargs.get("max_tokens") + modified_max_tokens = get_modified_max_tokens( + model=model, + base_model=base_model, + messages=messages, + user_max_tokens=user_max_tokens, + buffer_num=None, + buffer_perc=None, ) - ): - try: - base_model = model - if kwargs.get("hf_model_name", None) is not None: - base_model = f"huggingface/{kwargs.get('hf_model_name')}" - messages = None - if len(args) > 1: - messages = args[1] - elif kwargs.get("messages", None): - messages = kwargs["messages"] - user_max_tokens = kwargs.get("max_tokens") - modified_max_tokens = get_modified_max_tokens( - model=model, - base_model=base_model, - messages=messages, - user_max_tokens=user_max_tokens, - buffer_num=None, - buffer_perc=None, - ) - kwargs["max_tokens"] = modified_max_tokens - except Exception as e: - print_verbose(f"Error while checking max token limit: {str(e)}") - # MODEL CALL - result = original_function(*args, **{**kwargs, **{"model": model}}) - end_time = datetime.datetime.now() - if "stream" in kwargs and kwargs["stream"] is True: - if ( - "complete_response" in kwargs - and kwargs["complete_response"] is True - ): - chunks = [] - for idx, chunk in enumerate(result): - chunks.append(chunk) - return litellm.stream_chunk_builder( - chunks, messages=kwargs.get("messages", None) - ) - else: - return result - elif "acompletion" in kwargs and kwargs["acompletion"] is True: - return result - elif "aembedding" in kwargs and kwargs["aembedding"] is True: - return result - elif "aimg_generation" in kwargs and kwargs["aimg_generation"] is True: - return result - elif "atranscription" in kwargs and kwargs["atranscription"] is True: - return result - elif "aspeech" in kwargs and kwargs["aspeech"] is True: + kwargs["max_tokens"] = modified_max_tokens + except Exception as e: + print_verbose(f"Error while checking max token limit: {str(e)}") + # MODEL CALL + result = original_function(*args, **{**kwargs, **{"model": model}}) + end_time = datetime.datetime.now() + if "stream" in kwargs and kwargs["stream"] is True: + if "complete_response" in kwargs and kwargs["complete_response"] is True: + chunks = [] + for idx, chunk in enumerate(result): + chunks.append(chunk) + return litellm.stream_chunk_builder( + chunks, messages=kwargs.get("messages", None) + ) + else: return result + elif "acompletion" in kwargs and kwargs["acompletion"] is True: + return result + elif "aembedding" in kwargs and kwargs["aembedding"] is True: + return result + elif "aimg_generation" in kwargs and kwargs["aimg_generation"] is True: + return result + elif "atranscription" in kwargs and kwargs["atranscription"] is True: + return result + elif "aspeech" in kwargs and kwargs["aspeech"] is True: + return result - ### POST-CALL RULES ### - post_call_processing( - original_response=result, - model=model or None, - optional_params=kwargs, - ) - - # [OPTIONAL] ADD TO CACHE - _llm_caching_handler.sync_set_cache( - result=result, - args=args, - kwargs=kwargs, - ) + ### POST-CALL RULES ### + post_call_processing( + original_response=result, + model=model or None, + optional_params=kwargs, + ) - # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated - verbose_logger.info("Wrapper: Completed Call, calling success_handler") - threading.Thread( - target=logging_obj.success_handler, args=(result, start_time, end_time) - ).start() - # RETURN RESULT - if hasattr(result, "_hidden_params"): - result._hidden_params["model_id"] = kwargs.get("model_info", {}).get( - "id", None - ) - result._hidden_params["api_base"] = get_api_base( - model=model or "", - optional_params=getattr(logging_obj, "optional_params", {}), - ) - result._hidden_params["response_cost"] = ( - logging_obj._response_cost_calculator(result=result) - ) + # [OPTIONAL] ADD TO CACHE + _llm_caching_handler.sync_set_cache( + result=result, + args=args, + kwargs=kwargs, + ) - result._hidden_params["additional_headers"] = process_response_headers( - result._hidden_params.get("additional_headers") or {} - ) # GUARANTEE OPENAI HEADERS IN RESPONSE - if result is not None: - result._response_ms = ( - end_time - start_time - ).total_seconds() * 1000 # return response latency in ms like openai - return result - except Exception as e: - call_type = original_function.__name__ - if call_type == CallTypes.completion.value: - num_retries = ( - kwargs.get("num_retries", None) or litellm.num_retries or None - ) - litellm.num_retries = ( - None # set retries to None to prevent infinite loops - ) - context_window_fallback_dict = kwargs.get( - "context_window_fallback_dict", {} - ) + # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated + verbose_logger.info("Wrapper: Completed Call, calling success_handler") + threading.Thread( + target=logging_obj.success_handler, args=(result, start_time, end_time) + ).start() + # RETURN RESULT + if hasattr(result, "_hidden_params"): + result._hidden_params["model_id"] = kwargs.get("model_info", {}).get( + "id", None + ) + result._hidden_params["api_base"] = get_api_base( + model=model or "", + optional_params=getattr(logging_obj, "optional_params", {}), + ) + result._hidden_params["response_cost"] = ( + logging_obj._response_cost_calculator(result=result) + ) - _is_litellm_router_call = "model_group" in kwargs.get( - "metadata", {} - ) # check if call from litellm.router/proxy - if ( - num_retries and not _is_litellm_router_call - ): # only enter this if call is not from litellm router/proxy. router has it's own logic for retrying - if ( - isinstance(e, openai.APIError) - or isinstance(e, openai.Timeout) - or isinstance(e, openai.APIConnectionError) - ): - kwargs["num_retries"] = num_retries - return litellm.completion_with_retries(*args, **kwargs) - elif ( - isinstance(e, litellm.exceptions.ContextWindowExceededError) - and context_window_fallback_dict - and model in context_window_fallback_dict - and not _is_litellm_router_call - ): - if len(args) > 0: - args[0] = context_window_fallback_dict[model] # type: ignore - else: - kwargs["model"] = context_window_fallback_dict[model] - return original_function(*args, **kwargs) - traceback_exception = traceback.format_exc() - end_time = datetime.datetime.now() - - # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated - if logging_obj: - logging_obj.failure_handler( - e, traceback_exception, start_time, end_time - ) # DO NOT MAKE THREADED - router retry fallback relies on this! - raise e + result._hidden_params["additional_headers"] = process_response_headers( + result._hidden_params.get("additional_headers") or {} + ) # GUARANTEE OPENAI HEADERS IN RESPONSE + if result is not None: + result._response_ms = ( + end_time - start_time + ).total_seconds() * 1000 # return response latency in ms like openai + return result @wraps(original_function) async def wrapper_async(*args, **kwargs): @@ -1003,10 +947,12 @@ async def wrapper_async(*args, **kwargs): async def get_healthy_deployments(*args, **kwargs): return [], [] - return await run_async_with_retries( + return await async_run_with_retries( original_function=lambda *args, **kwargs: _wrapper_async( original_function, model, *args, **kwargs ), + original_function_args=args, + original_function_kwargs=kwargs, num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, retry_after=0, retry_policy=kwargs.get("retry_policy"), @@ -1020,8 +966,6 @@ async def get_healthy_deployments(*args, **kwargs): # TODO: Explain log_retry=lambda kwargs, _: kwargs, model_list=[], - *args, - **kwargs, ) async def _wrapper_async( From 203db9e3f9fdfaa63f9ae6cf639239224493d87b Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 01:33:52 -0800 Subject: [PATCH 07/27] fix Signed-off-by: dbczumar --- litellm/router_utils/retry_utils.py | 2 +- litellm/utils.py | 32 +++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index 70242f75487e..10a33cfecbb9 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -126,7 +126,7 @@ async def async_run_with_retries( ## LOGGING if num_retries > 0: original_function_kwargs = log_retry( - original_function_kwargs=original_function_kwargs, + kwargs=original_function_kwargs, e=original_exception, ) else: diff --git a/litellm/utils.py b/litellm/utils.py index eac43307d887..708a33f45287 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -687,7 +687,18 @@ def wrapper(*args, **kwargs): raise ValueError("model param not passed in.") async def get_healthy_deployments(*args, **kwargs): - return [], [] + """ + Function to return a mock healthy LiteLLM Router deployment for consumption + during retry logic. + + Returns: + - A list of healthy deployments containing one mock deployment corresponding to the model + - A list of deployments containing one mock deployment corresponding to the model + """ + mock_deployment = { + "model_name": model, + } + return [mock_deployment], [mock_deployment] return run_with_retries( original_function=lambda *args, **kwargs: _wrapper( @@ -706,7 +717,7 @@ async def get_healthy_deployments(*args, **kwargs): # TODO: Explain get_healthy_deployments=get_healthy_deployments, # TODO: Explain - log_retry=lambda kwargs, _: kwargs, + log_retry=lambda kwargs, e: kwargs, model_list=[], ) @@ -945,7 +956,18 @@ async def wrapper_async(*args, **kwargs): raise ValueError("model param not passed in.") async def get_healthy_deployments(*args, **kwargs): - return [], [] + """ + Function to return a mock healthy LiteLLM Router deployment for consumption + during retry logic. + + Returns: + - A list of healthy deployments containing one mock deployment corresponding to the model + - A list of deployments containing one mock deployment corresponding to the model + """ + mock_deployment = { + "model_name": model, + } + return [mock_deployment], [mock_deployment] return await async_run_with_retries( original_function=lambda *args, **kwargs: _wrapper_async( @@ -961,10 +983,8 @@ async def get_healthy_deployments(*args, **kwargs): model, [] ), content_policy_fallbacks=[], - # TODO: Explain get_healthy_deployments=get_healthy_deployments, - # TODO: Explain - log_retry=lambda kwargs, _: kwargs, + log_retry=lambda kwargs, e: kwargs, model_list=[], ) From 91369cbeddc32a29edc35f04abfcb25b586a79b9 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 01:39:08 -0800 Subject: [PATCH 08/27] fix Signed-off-by: dbczumar --- litellm/router.py | 3 +++ litellm/router_utils/retry_utils.py | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 34b74e0bf873..2d9d2ff37dca 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2887,6 +2887,9 @@ async def async_function_with_retries(self, *args, **kwargs): # noqa: PLR0915 if isinstance(retry_policy, dict): retry_policy = RetryPolicy(**retry_policy) + verbose_router_logger.debug( + f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}" + ) return await async_run_with_retries( original_function=original_function, original_function_args=args, diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index 10a33cfecbb9..3edab6fb3245 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -80,10 +80,6 @@ async def async_run_with_retries( if model_list is not None: _metadata.update({"model_group_size": len(model_list)}) - # TODO: Figure out logging - take a logger arg? - # verbose_router_logger.debug( - # f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}" - # ) try: handle_mock_testing_rate_limit_error( model_group=model_group, kwargs=original_function_kwargs From 63c041a74a653d8fcf3f0eb6135338bd8f8c0aae Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 02:04:08 -0800 Subject: [PATCH 09/27] fix Signed-off-by: dbczumar --- litellm/router_utils/retry_utils.py | 8 +- litellm/utils.py | 154 +++++++++++++++++----------- 2 files changed, 99 insertions(+), 63 deletions(-) diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index 3edab6fb3245..bd0f8edd6522 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -26,6 +26,9 @@ def run_with_retries( log_retry: Callable, model_list: Optional[List[DeploymentTypedDict]], ): + """ + Runs the specified function with retries and fallbacks. + """ async_task = async_run_with_retries( original_function=original_function, original_function_args=original_function_args, @@ -70,7 +73,10 @@ async def async_run_with_retries( get_healthy_deployments: Callable, log_retry: Callable, model_list: Optional[List[DeploymentTypedDict]], -): # noqa: PLR0915 +): + """ + Runs the specified function asynchronously with retries and fallbacks. + """ parent_otel_span = _get_parent_otel_span_from_kwargs(original_function_kwargs) ## ADD MODEL GROUP SIZE TO METADATA - used for model_group_rate_limit_error tracking diff --git a/litellm/utils.py b/litellm/utils.py index 708a33f45287..cd7aec785ac0 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -672,37 +672,57 @@ def post_call_processing(original_response, model, optional_params: Optional[dic except Exception as e: raise e - @wraps(original_function) - def wrapper(*args, **kwargs): - model: Optional[str] = None + async def _get_mock_healthy_deployments(model): + """ + Returns a mock healthy LiteLLM Router deployment for consumption during retry logic. + + Returns: + - A list of healthy deployments containing one mock deployment corresponding to + the model + - A list of deployments containing one mock deployment corresponding to the model. + """ + mock_deployment = { + "model_name": model, + } + return [mock_deployment], [mock_deployment] + + def _get_model_from_wrapper_args( + call_type, empty_model_call_types, args, kwargs + ) -> Optional[str]: + """ + Fetches the name of the model from the arguments passed to the wrapper function. + + Args: + call_type: The name of the LiteLLM call type (text completion, image generation, etc.). + empty_model_call_types: A list of call types that do not require a model. If the model + is not found in the arguments, the function will return None if + the call type is in this list, else it will raise an error. + args: The positional arguments passed to the wrapper function. + kwargs: The keyword arguments passed to the wrapper function. + """ + model = None try: - model = args[0] if len(args) > 0 else kwargs.pop("model") + model = args[0] if len(args) > 0 else kwargs["model"] except Exception: model = None - call_type = original_function.__name__ - if ( - call_type != CallTypes.image_generation.value - and call_type != CallTypes.text_completion.value - ): + if call_type not in empty_model_call_types: raise ValueError("model param not passed in.") + return model - async def get_healthy_deployments(*args, **kwargs): - """ - Function to return a mock healthy LiteLLM Router deployment for consumption - during retry logic. - - Returns: - - A list of healthy deployments containing one mock deployment corresponding to the model - - A list of deployments containing one mock deployment corresponding to the model - """ - mock_deployment = { - "model_name": model, - } - return [mock_deployment], [mock_deployment] - + @wraps(original_function) + def wrapper(*args, **kwargs): + model = _get_model_from_wrapper_args( + call_type=original_function.__name__, + empty_model_call_types=[ + CallTypes.image_generation.value, + CallTypes.text_completion.value, + ], + args=args, + kwargs=kwargs, + ) return run_with_retries( original_function=lambda *args, **kwargs: _wrapper( - original_function, model, *args, **kwargs + original_function, *args, **kwargs ), original_function_args=args, original_function_kwargs=kwargs, @@ -714,14 +734,14 @@ async def get_healthy_deployments(*args, **kwargs): model, [] ), content_policy_fallbacks=[], - # TODO: Explain - get_healthy_deployments=get_healthy_deployments, - # TODO: Explain + get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( + model + ), log_retry=lambda kwargs, e: kwargs, model_list=[], ) - def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 + def _wrapper(original_function, *args, **kwargs): # noqa: PLR0915 # DO NOT MOVE THIS. It always needs to run first # Check if this is an async function. If so only execute the async function if ( @@ -775,6 +795,16 @@ def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) + model = _get_model_from_wrapper_args( + call_type=original_function.__name__, + empty_model_call_types=[ + CallTypes.image_generation.value, + CallTypes.text_completion.value, + ], + args=args, + kwargs=kwargs, + ) + if logging_obj is None: logging_obj, kwargs = function_setup( original_function.__name__, rules_obj, start_time, *args, **kwargs @@ -877,7 +907,7 @@ def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 except Exception as e: print_verbose(f"Error while checking max token limit: {str(e)}") # MODEL CALL - result = original_function(*args, **{**kwargs, **{"model": model}}) + result = original_function(*args, **kwargs) end_time = datetime.datetime.now() if "stream" in kwargs and kwargs["stream"] is True: if "complete_response" in kwargs and kwargs["complete_response"] is True: @@ -943,35 +973,22 @@ def _wrapper(original_function, model, *args, **kwargs): # noqa: PLR0915 @wraps(original_function) async def wrapper_async(*args, **kwargs): - model = "" - try: - model = args[0] if len(args) > 0 else kwargs.pop("model") - except Exception: - call_type = original_function.__name__ - if ( - call_type != CallTypes.aimage_generation.value # model optional - and call_type != CallTypes.atext_completion.value # can also be engine - and call_type != CallTypes.amoderation.value - ): - raise ValueError("model param not passed in.") - - async def get_healthy_deployments(*args, **kwargs): - """ - Function to return a mock healthy LiteLLM Router deployment for consumption - during retry logic. - - Returns: - - A list of healthy deployments containing one mock deployment corresponding to the model - - A list of deployments containing one mock deployment corresponding to the model - """ - mock_deployment = { - "model_name": model, - } - return [mock_deployment], [mock_deployment] - + model = ( + _get_model_from_wrapper_args( + call_type=original_function.__name__, + empty_model_call_types=[ + CallTypes.aimage_generation.value, + CallTypes.atext_completion.value, + CallTypes.amoderation.value, + ], + args=args, + kwargs=kwargs, + ) + or "" + ) return await async_run_with_retries( original_function=lambda *args, **kwargs: _wrapper_async( - original_function, model, *args, **kwargs + original_function, *args, **kwargs ), original_function_args=args, original_function_kwargs=kwargs, @@ -983,14 +1000,14 @@ async def get_healthy_deployments(*args, **kwargs): model, [] ), content_policy_fallbacks=[], - get_healthy_deployments=get_healthy_deployments, + get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( + model + ), log_retry=lambda kwargs, e: kwargs, model_list=[], ) - async def _wrapper_async( - original_function, model, *args, **kwargs - ): # noqa: PLR0915 + async def _wrapper_async(original_function, *args, **kwargs): # noqa: PLR0915 print_args_passed_to_litellm(original_function, args, kwargs) start_time = datetime.datetime.now() result = None @@ -1007,6 +1024,20 @@ async def _wrapper_async( if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) + model = ( + _get_model_from_wrapper_args( + call_type=original_function.__name__, + empty_model_call_types=[ + CallTypes.aimage_generation.value, + CallTypes.atext_completion.value, + CallTypes.amoderation.value, + ], + args=args, + kwargs=kwargs, + ) + or "" + ) + if logging_obj is None: logging_obj, kwargs = function_setup( original_function.__name__, rules_obj, start_time, *args, **kwargs @@ -1046,8 +1077,7 @@ async def _wrapper_async( return _caching_handler_response.final_embedding_cached_response # MODEL CALL - # TODO: Clean this up! - result = await original_function(*args, **{**kwargs, **{"model": model}}) + result = await original_function(*args, **kwargs) end_time = datetime.datetime.now() if "stream" in kwargs and kwargs["stream"] is True: if "complete_response" in kwargs and kwargs["complete_response"] is True: From 22853589737ffa7fb76929fcb3dd3fe42ebf2609 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 02:52:40 -0800 Subject: [PATCH 10/27] cases Signed-off-by: dbczumar --- litellm/proxy/_experimental/out/404.html | 1 - .../proxy/_experimental/out/model_hub.html | 1 - .../proxy/_experimental/out/onboarding.html | 1 - .../test_completion_with_retries.py | 106 +++++++++++++++++- 4 files changed, 105 insertions(+), 4 deletions(-) delete mode 100644 litellm/proxy/_experimental/out/404.html delete mode 100644 litellm/proxy/_experimental/out/model_hub.html delete mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index 223f5d80e078..000000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html deleted file mode 100644 index 9d1f566c5847..000000000000 --- a/litellm/proxy/_experimental/out/model_hub.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index abd1ff84c5b5..000000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/tests/local_testing/test_completion_with_retries.py b/tests/local_testing/test_completion_with_retries.py index e59d1d6e13ad..b72e39a71800 100644 --- a/tests/local_testing/test_completion_with_retries.py +++ b/tests/local_testing/test_completion_with_retries.py @@ -11,14 +11,49 @@ import pytest import openai import litellm -from litellm import completion_with_retries, completion +from litellm import completion_with_retries, completion, acompletion from litellm import ( AuthenticationError, BadRequestError, + InternalServerError, RateLimitError, ServiceUnavailableError, OpenAIError, + RetryPolicy, + Timeout, ) +from litellm.integrations.custom_logger import CustomLogger + + +class CallCounterHandler(CustomLogger): + success: bool = False + failure: bool = False + api_call_count: int = 0 + + def log_pre_api_call(self, model, messages, kwargs): + print(f"Pre-API Call") + self.api_call_count += 1 + + def log_post_api_call(self, kwargs, response_obj, start_time, end_time): + print( + f"Post-API Call - response object: {response_obj}; model: {kwargs['model']}" + ) + + def log_stream_event(self, kwargs, response_obj, start_time, end_time): + print(f"On Stream") + + def async_log_stream_event(self, kwargs, response_obj, start_time, end_time): + print(f"On Stream") + + def log_success_event(self, kwargs, response_obj, start_time, end_time): + print(f"On Success") + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + print(f"On Success") + + def log_failure_event(self, kwargs, response_obj, start_time, end_time): + print(f"On Failure") + user_message = "Hello, whats the weather in San Francisco??" messages = [{"content": user_message, "role": "user"}] @@ -61,3 +96,72 @@ def test_completion_with_0_num_retries(): except Exception as e: print("exception", e) pass + + +@pytest.mark.parametrize("num_retries", [0, 3]) +def test_completion_num_retries(num_retries): + call_counter_handler = CallCounterHandler() + litellm.callbacks = [call_counter_handler] + + with pytest.raises(Exception, match="Invalid Request"): + completion( + model="gpt-3.5-turbo", + messages=[{"gm": "vibe", "role": "user"}], + mock_response=(Exception("Invalid Request")), + num_retries=num_retries, + ) + + assert ( + call_counter_handler.api_call_count == num_retries + 1 + ) # 1 initial call + retries + + +@pytest.mark.parametrize("num_retries", [0, 3]) +async def test_async_completion_num_retries(num_retries): + call_counter_handler = CallCounterHandler() + litellm.callbacks = [call_counter_handler] + + with pytest.raises(Exception, match="Invalid Request"): + await acompletion( + model="gpt-3.5-turbo", + messages=[{"gm": "vibe", "role": "user"}], + mock_response=(Exception("Invalid Request")), + num_retries=num_retries, + ) + + assert ( + call_counter_handler.api_call_count == num_retries + 1 + ) # 1 initial call + retries + + +@pytest.mark.parametrize( + ("Error", "expected_num_retries"), + [ + (RateLimitError, 3), + (Timeout, 2), + ], +) +def test_completion_retry_policy(Error, expected_num_retries): + call_counter_handler = CallCounterHandler() + litellm.callbacks = [call_counter_handler] + retry_policy = RetryPolicy( + RateLimitErrorRetries=3, + TimeoutErrorRetries=2, + ) + + with pytest.raises(Error): + completion( + model="gpt-3.5-turbo", + messages=[{"gm": "vibe", "role": "user"}], + mock_response=( + Error(message="Bad!", llm_provider="openai", model="gpt-3.5-turbo") + ), + # Verify that the retry policy is used instead of the num_retries parameter + # when both are provided + # num_retries=100, + retry_policy=retry_policy, + ) + + assert ( + call_counter_handler.api_call_count == expected_num_retries + 1 + ) # 1 initial call + retries From adea250dcfe68673dffacdbf7966e01f4536a9c1 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 02:54:40 -0800 Subject: [PATCH 11/27] fix Signed-off-by: dbczumar --- .../test_completion_with_retries.py | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/local_testing/test_completion_with_retries.py b/tests/local_testing/test_completion_with_retries.py index b72e39a71800..1e3007f316bd 100644 --- a/tests/local_testing/test_completion_with_retries.py +++ b/tests/local_testing/test_completion_with_retries.py @@ -138,7 +138,7 @@ async def test_async_completion_num_retries(num_retries): ("Error", "expected_num_retries"), [ (RateLimitError, 3), - (Timeout, 2), + (Timeout, 1), ], ) def test_completion_retry_policy(Error, expected_num_retries): @@ -146,7 +146,7 @@ def test_completion_retry_policy(Error, expected_num_retries): litellm.callbacks = [call_counter_handler] retry_policy = RetryPolicy( RateLimitErrorRetries=3, - TimeoutErrorRetries=2, + TimeoutErrorRetries=1, ) with pytest.raises(Error): @@ -165,3 +165,36 @@ def test_completion_retry_policy(Error, expected_num_retries): assert ( call_counter_handler.api_call_count == expected_num_retries + 1 ) # 1 initial call + retries + + +@pytest.mark.parametrize( + ("Error", "expected_num_retries"), + [ + (RateLimitError, 3), + (Timeout, 2), + ], +) +async def test_async_completion_retry_policy(Error, expected_num_retries): + call_counter_handler = CallCounterHandler() + litellm.callbacks = [call_counter_handler] + retry_policy = RetryPolicy( + RateLimitErrorRetries=3, + TimeoutErrorRetries=1, + ) + + with pytest.raises(Error): + await completion( + model="gpt-3.5-turbo", + messages=[{"gm": "vibe", "role": "user"}], + mock_response=( + Error(message="Bad!", llm_provider="openai", model="gpt-3.5-turbo") + ), + # Verify that the retry policy is used instead of the num_retries parameter + # when both are provided + # num_retries=100, + retry_policy=retry_policy, + ) + + assert ( + call_counter_handler.api_call_count == expected_num_retries + 1 + ) # 1 initial call + retries From 45b2e2ba39e08fd0cafa16cd20a8fc045cb6add3 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 02:55:18 -0800 Subject: [PATCH 12/27] revert unintended Signed-off-by: dbczumar --- litellm/proxy/_experimental/out/404.html | 1 + litellm/proxy/_experimental/out/model_hub.html | 1 + litellm/proxy/_experimental/out/onboarding.html | 1 + 3 files changed, 3 insertions(+) create mode 100644 litellm/proxy/_experimental/out/404.html create mode 100644 litellm/proxy/_experimental/out/model_hub.html create mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html new file mode 100644 index 000000000000..223f5d80e078 --- /dev/null +++ b/litellm/proxy/_experimental/out/404.html @@ -0,0 +1 @@ +404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html new file mode 100644 index 000000000000..9d1f566c5847 --- /dev/null +++ b/litellm/proxy/_experimental/out/model_hub.html @@ -0,0 +1 @@ +LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html new file mode 100644 index 000000000000..abd1ff84c5b5 --- /dev/null +++ b/litellm/proxy/_experimental/out/onboarding.html @@ -0,0 +1 @@ +LiteLLM Dashboard \ No newline at end of file From c2817208303e3262278df8d2bcb59a35b83185a1 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 03:10:38 -0800 Subject: [PATCH 13/27] fix Signed-off-by: dbczumar --- litellm/main.py | 3 + litellm/proxy/_experimental/out/404.html | 1 - .../proxy/_experimental/out/model_hub.html | 1 - .../proxy/_experimental/out/onboarding.html | 1 - litellm/utils.py | 10 ++- .../test_completion_with_retries.py | 63 +++---------------- 6 files changed, 21 insertions(+), 58 deletions(-) delete mode 100644 litellm/proxy/_experimental/out/404.html delete mode 100644 litellm/proxy/_experimental/out/model_hub.html delete mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/main.py b/litellm/main.py index 5095ce518295..066d0f4bf751 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -372,6 +372,8 @@ async def acompletion( LITELLM Specific Params mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None). custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock" + max_retries (int, optional): The number of retries to attempt (default is 0). + retry_policy (RetryPolicy, optional): The retry policy to use for request failures (default is None). Returns: ModelResponse: A response object containing the generated completion and associated metadata. @@ -768,6 +770,7 @@ def completion( # type: ignore # noqa: PLR0915 mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None). custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock" max_retries (int, optional): The number of retries to attempt (default is 0). + retry_policy (RetryPolicy, optional): The retry policy to use for request failures (default is None). Returns: ModelResponse: A response object containing the generated completion and associated metadata. diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index 223f5d80e078..000000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html deleted file mode 100644 index 9d1f566c5847..000000000000 --- a/litellm/proxy/_experimental/out/model_hub.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index abd1ff84c5b5..000000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index cd7aec785ac0..5a4123376075 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -726,7 +726,10 @@ def wrapper(*args, **kwargs): ), original_function_args=args, original_function_kwargs=kwargs, - num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, + num_retries=kwargs.get("max_retries", 0) + or kwargs.get("num_retries", 0) + or litellm.num_retries + or 0, retry_after=0, retry_policy=kwargs.get("retry_policy"), fallbacks=kwargs.get("fallbacks", []), @@ -992,7 +995,10 @@ async def wrapper_async(*args, **kwargs): ), original_function_args=args, original_function_kwargs=kwargs, - num_retries=kwargs.get("num_retries", 0) or litellm.num_retries or 0, + num_retries=kwargs.get("max_retries", 0) + or kwargs.get("num_retries", 0) + or litellm.num_retries + or 0, retry_after=0, retry_policy=kwargs.get("retry_policy"), fallbacks=kwargs.get("fallbacks", []), diff --git a/tests/local_testing/test_completion_with_retries.py b/tests/local_testing/test_completion_with_retries.py index 1e3007f316bd..cfbec694a09d 100644 --- a/tests/local_testing/test_completion_with_retries.py +++ b/tests/local_testing/test_completion_with_retries.py @@ -55,51 +55,8 @@ def log_failure_event(self, kwargs, response_obj, start_time, end_time): print(f"On Failure") -user_message = "Hello, whats the weather in San Francisco??" -messages = [{"content": user_message, "role": "user"}] - - -def logger_fn(user_model_dict): - # print(f"user_model_dict: {user_model_dict}") - pass - - -# completion with num retries + impact on exception mapping -def test_completion_with_num_retries(): - try: - response = completion( - model="j2-ultra", - messages=[{"messages": "vibe", "bad": "message"}], - num_retries=2, - ) - pytest.fail(f"Unmapped exception occurred") - except Exception as e: - pass - - -# test_completion_with_num_retries() -def test_completion_with_0_num_retries(): - try: - litellm.set_verbose = False - print("making request") - - # Use the completion function - response = completion( - model="gpt-3.5-turbo", - messages=[{"gm": "vibe", "role": "user"}], - max_retries=4, - ) - - print(response) - - # print(response) - except Exception as e: - print("exception", e) - pass - - -@pytest.mark.parametrize("num_retries", [0, 3]) -def test_completion_num_retries(num_retries): +@pytest.mark.parametrize("max_retries", [0, 3]) +def test_completion_max_retries(max_retries): call_counter_handler = CallCounterHandler() litellm.callbacks = [call_counter_handler] @@ -108,16 +65,16 @@ def test_completion_num_retries(num_retries): model="gpt-3.5-turbo", messages=[{"gm": "vibe", "role": "user"}], mock_response=(Exception("Invalid Request")), - num_retries=num_retries, + max_retries=max_retries, ) assert ( - call_counter_handler.api_call_count == num_retries + 1 + call_counter_handler.api_call_count == max_retries + 1 ) # 1 initial call + retries -@pytest.mark.parametrize("num_retries", [0, 3]) -async def test_async_completion_num_retries(num_retries): +@pytest.mark.parametrize("max_retries", [0, 3]) +async def test_async_completion_max_retries(max_retries): call_counter_handler = CallCounterHandler() litellm.callbacks = [call_counter_handler] @@ -126,11 +83,11 @@ async def test_async_completion_num_retries(num_retries): model="gpt-3.5-turbo", messages=[{"gm": "vibe", "role": "user"}], mock_response=(Exception("Invalid Request")), - num_retries=num_retries, + max_retries=max_retries, ) assert ( - call_counter_handler.api_call_count == num_retries + 1 + call_counter_handler.api_call_count == max_retries + 1 ) # 1 initial call + retries @@ -158,7 +115,7 @@ def test_completion_retry_policy(Error, expected_num_retries): ), # Verify that the retry policy is used instead of the num_retries parameter # when both are provided - # num_retries=100, + max_retries=100, retry_policy=retry_policy, ) @@ -191,7 +148,7 @@ async def test_async_completion_retry_policy(Error, expected_num_retries): ), # Verify that the retry policy is used instead of the num_retries parameter # when both are provided - # num_retries=100, + max_retries=100, retry_policy=retry_policy, ) From 73fbb9ab82cabe751287891bca4da4e011b963e2 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 03:13:49 -0800 Subject: [PATCH 14/27] revert Signed-off-by: dbczumar --- litellm/proxy/_experimental/out/404.html | 1 + litellm/proxy/_experimental/out/model_hub.html | 1 + litellm/proxy/_experimental/out/onboarding.html | 1 + 3 files changed, 3 insertions(+) create mode 100644 litellm/proxy/_experimental/out/404.html create mode 100644 litellm/proxy/_experimental/out/model_hub.html create mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html new file mode 100644 index 000000000000..223f5d80e078 --- /dev/null +++ b/litellm/proxy/_experimental/out/404.html @@ -0,0 +1 @@ +404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html new file mode 100644 index 000000000000..9d1f566c5847 --- /dev/null +++ b/litellm/proxy/_experimental/out/model_hub.html @@ -0,0 +1 @@ +LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html new file mode 100644 index 000000000000..abd1ff84c5b5 --- /dev/null +++ b/litellm/proxy/_experimental/out/onboarding.html @@ -0,0 +1 @@ +LiteLLM Dashboard \ No newline at end of file From 0c2580f4b84b51fe393b31adb4ff45482a1b0468 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 03:16:07 -0800 Subject: [PATCH 15/27] format Signed-off-by: dbczumar --- litellm/utils.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index 5a4123376075..83704480686f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -726,10 +726,12 @@ def wrapper(*args, **kwargs): ), original_function_args=args, original_function_kwargs=kwargs, - num_retries=kwargs.get("max_retries", 0) - or kwargs.get("num_retries", 0) - or litellm.num_retries - or 0, + num_retries=( + kwargs.get("max_retries", 0) + or kwargs.get("num_retries", 0) + or litellm.num_retries + or 0 + ), retry_after=0, retry_policy=kwargs.get("retry_policy"), fallbacks=kwargs.get("fallbacks", []), @@ -995,10 +997,12 @@ async def wrapper_async(*args, **kwargs): ), original_function_args=args, original_function_kwargs=kwargs, - num_retries=kwargs.get("max_retries", 0) - or kwargs.get("num_retries", 0) - or litellm.num_retries - or 0, + num_retries=( + kwargs.get("max_retries", 0) + or kwargs.get("num_retries", 0) + or litellm.num_retries + or 0 + ), retry_after=0, retry_policy=kwargs.get("retry_policy"), fallbacks=kwargs.get("fallbacks", []), From 58489b248177156ec3cbece6cb26783283507fef Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 03:23:04 -0800 Subject: [PATCH 16/27] exc map Signed-off-by: dbczumar --- tests/local_testing/test_completion_with_retries.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/local_testing/test_completion_with_retries.py b/tests/local_testing/test_completion_with_retries.py index cfbec694a09d..d2323395c47e 100644 --- a/tests/local_testing/test_completion_with_retries.py +++ b/tests/local_testing/test_completion_with_retries.py @@ -55,6 +55,19 @@ def log_failure_event(self, kwargs, response_obj, start_time, end_time): print(f"On Failure") +# completion with num retries + impact on exception mapping +def test_completion_exception_mapping_with_num_retries(): + try: + response = completion( + model="j2-ultra", + messages=[{"messages": "vibe", "bad": "message"}], + num_retries=2, + ) + pytest.fail(f"Unmapped exception occurred") + except Exception as e: + pass + + @pytest.mark.parametrize("max_retries", [0, 3]) def test_completion_max_retries(max_retries): call_counter_handler = CallCounterHandler() From d34d91f1cc13ed117af4912a84d4d9b35ddd0a8a Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 03:32:30 -0800 Subject: [PATCH 17/27] Async decorator Signed-off-by: dbczumar --- tests/local_testing/test_completion_with_retries.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/local_testing/test_completion_with_retries.py b/tests/local_testing/test_completion_with_retries.py index d2323395c47e..8231fe86815b 100644 --- a/tests/local_testing/test_completion_with_retries.py +++ b/tests/local_testing/test_completion_with_retries.py @@ -86,6 +86,7 @@ def test_completion_max_retries(max_retries): ) # 1 initial call + retries +@pytest.mark.asyncio @pytest.mark.parametrize("max_retries", [0, 3]) async def test_async_completion_max_retries(max_retries): call_counter_handler = CallCounterHandler() @@ -137,6 +138,7 @@ def test_completion_retry_policy(Error, expected_num_retries): ) # 1 initial call + retries +@pytest.mark.asyncio @pytest.mark.parametrize( ("Error", "expected_num_retries"), [ From 3983c16d051b3a2d9e9c23f6f1a279d57d62639f Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 03:34:49 -0800 Subject: [PATCH 18/27] fix Signed-off-by: dbczumar --- litellm/main.py | 1 - .../test_completion_with_retries.py | 21 +------------------ 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 066d0f4bf751..a16a3ca4d800 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -372,7 +372,6 @@ async def acompletion( LITELLM Specific Params mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None). custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock" - max_retries (int, optional): The number of retries to attempt (default is 0). retry_policy (RetryPolicy, optional): The retry policy to use for request failures (default is None). Returns: ModelResponse: A response object containing the generated completion and associated metadata. diff --git a/tests/local_testing/test_completion_with_retries.py b/tests/local_testing/test_completion_with_retries.py index 8231fe86815b..f433a1a4b16e 100644 --- a/tests/local_testing/test_completion_with_retries.py +++ b/tests/local_testing/test_completion_with_retries.py @@ -86,25 +86,6 @@ def test_completion_max_retries(max_retries): ) # 1 initial call + retries -@pytest.mark.asyncio -@pytest.mark.parametrize("max_retries", [0, 3]) -async def test_async_completion_max_retries(max_retries): - call_counter_handler = CallCounterHandler() - litellm.callbacks = [call_counter_handler] - - with pytest.raises(Exception, match="Invalid Request"): - await acompletion( - model="gpt-3.5-turbo", - messages=[{"gm": "vibe", "role": "user"}], - mock_response=(Exception("Invalid Request")), - max_retries=max_retries, - ) - - assert ( - call_counter_handler.api_call_count == max_retries + 1 - ) # 1 initial call + retries - - @pytest.mark.parametrize( ("Error", "expected_num_retries"), [ @@ -143,7 +124,7 @@ def test_completion_retry_policy(Error, expected_num_retries): ("Error", "expected_num_retries"), [ (RateLimitError, 3), - (Timeout, 2), + (Timeout, 1), ], ) async def test_async_completion_retry_policy(Error, expected_num_retries): From 6979ef4ce873dc9313155f968da6689eb93ee4a0 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Tue, 26 Nov 2024 23:39:09 -0800 Subject: [PATCH 19/27] fix Signed-off-by: dbczumar --- litellm/utils.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index 83704480686f..80fc717ec189 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -709,6 +709,22 @@ def _get_model_from_wrapper_args( raise ValueError("model param not passed in.") return model + def _get_and_reset_retries_for_wrapper_call(kwargs): + """ + Fetches the number of retries from the kwargs and resets the retries to 0 in the kwargs. + This is used to prevent retry logic from within the original function (e.g. the LiteLLM + OpenAI chat completions provider) from running on top of the retry logic in the wrapper. + """ + num_retries = ( + kwargs.get("max_retries", 0) + or kwargs.get("num_retries", 0) + or litellm.num_retries + or 0 + ) + kwargs["max_retries"] = 0 + kwargs["num_retries"] = 0 + return num_retries + @wraps(original_function) def wrapper(*args, **kwargs): model = _get_model_from_wrapper_args( @@ -720,18 +736,14 @@ def wrapper(*args, **kwargs): args=args, kwargs=kwargs, ) + num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) return run_with_retries( original_function=lambda *args, **kwargs: _wrapper( original_function, *args, **kwargs ), original_function_args=args, original_function_kwargs=kwargs, - num_retries=( - kwargs.get("max_retries", 0) - or kwargs.get("num_retries", 0) - or litellm.num_retries - or 0 - ), + num_retries=num_retries, retry_after=0, retry_policy=kwargs.get("retry_policy"), fallbacks=kwargs.get("fallbacks", []), @@ -991,18 +1003,14 @@ async def wrapper_async(*args, **kwargs): ) or "" ) + num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) return await async_run_with_retries( original_function=lambda *args, **kwargs: _wrapper_async( original_function, *args, **kwargs ), original_function_args=args, original_function_kwargs=kwargs, - num_retries=( - kwargs.get("max_retries", 0) - or kwargs.get("num_retries", 0) - or litellm.num_retries - or 0 - ), + num_retries=num_retries, retry_after=0, retry_policy=kwargs.get("retry_policy"), fallbacks=kwargs.get("fallbacks", []), From 3cc745cc29bd763b66a8d4b3bc04a88737fbd66b Mon Sep 17 00:00:00 2001 From: dbczumar Date: Wed, 27 Nov 2024 02:52:42 -0800 Subject: [PATCH 20/27] fix Signed-off-by: dbczumar --- litellm/proxy/_experimental/out/404.html | 1 - .../proxy/_experimental/out/model_hub.html | 1 - .../proxy/_experimental/out/onboarding.html | 1 - litellm/router_utils/retry_utils.py | 27 ++- litellm/utils.py | 197 +++++++----------- 5 files changed, 102 insertions(+), 125 deletions(-) delete mode 100644 litellm/proxy/_experimental/out/404.html delete mode 100644 litellm/proxy/_experimental/out/model_hub.html delete mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index 223f5d80e078..000000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html deleted file mode 100644 index 9d1f566c5847..000000000000 --- a/litellm/proxy/_experimental/out/model_hub.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index abd1ff84c5b5..000000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index bd0f8edd6522..a7320933f749 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -29,6 +29,23 @@ def run_with_retries( """ Runs the specified function with retries and fallbacks. """ + try: + if asyncio.current_task() is not None: + # If run_with_retries (synchronous) is called from within an async function, + # which can happen if a LiteLLM wrapped function with retries calls another + # LiteLLM wrapped function with retries, then we should just run the original + # function directly. The outer function will handle the retries. Otherwise, we would + # erroneously return a couroutine object from run_with_retries instead of the actual + # response, breaking the outer function + return original_function( + *original_function_args, **original_function_kwargs + ) + except RuntimeError: + pass + + # If run_with_retries is called from a synchronous function, we need to run the async version + # and wait for the result. We proceed to get or create an event loop and run the async function + # until completion async_task = async_run_with_retries( original_function=original_function, original_function_args=original_function_args, @@ -44,12 +61,12 @@ def run_with_retries( model_list=model_list, ) try: - # Check if an event loop is already running - loop = asyncio.get_running_loop() - # If running in an async context, return the coroutine for awaiting - return async_task + loop = asyncio.get_event_loop() + if loop.is_running(): + return loop.run_until_complete(async_task) except RuntimeError: - # If no event loop is running, create a new one and run the task + pass + else: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: diff --git a/litellm/utils.py b/litellm/utils.py index 80fc717ec189..f9e3e909c5e5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -686,29 +686,6 @@ async def _get_mock_healthy_deployments(model): } return [mock_deployment], [mock_deployment] - def _get_model_from_wrapper_args( - call_type, empty_model_call_types, args, kwargs - ) -> Optional[str]: - """ - Fetches the name of the model from the arguments passed to the wrapper function. - - Args: - call_type: The name of the LiteLLM call type (text completion, image generation, etc.). - empty_model_call_types: A list of call types that do not require a model. If the model - is not found in the arguments, the function will return None if - the call type is in this list, else it will raise an error. - args: The positional arguments passed to the wrapper function. - kwargs: The keyword arguments passed to the wrapper function. - """ - model = None - try: - model = args[0] if len(args) > 0 else kwargs["model"] - except Exception: - model = None - if call_type not in empty_model_call_types: - raise ValueError("model param not passed in.") - return model - def _get_and_reset_retries_for_wrapper_call(kwargs): """ Fetches the number of retries from the kwargs and resets the retries to 0 in the kwargs. @@ -726,39 +703,20 @@ def _get_and_reset_retries_for_wrapper_call(kwargs): return num_retries @wraps(original_function) - def wrapper(*args, **kwargs): - model = _get_model_from_wrapper_args( - call_type=original_function.__name__, - empty_model_call_types=[ - CallTypes.image_generation.value, - CallTypes.text_completion.value, - ], - args=args, - kwargs=kwargs, - ) + def wrapper(*args, **kwargs): # noqa: PLR0915 num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) - return run_with_retries( - original_function=lambda *args, **kwargs: _wrapper( - original_function, *args, **kwargs - ), - original_function_args=args, - original_function_kwargs=kwargs, - num_retries=num_retries, - retry_after=0, - retry_policy=kwargs.get("retry_policy"), - fallbacks=kwargs.get("fallbacks", []), - context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( - model, [] - ), - content_policy_fallbacks=[], - get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( - model - ), - log_retry=lambda kwargs, e: kwargs, - model_list=[], - ) + call_type = original_function.__name__ + model: Optional[str] = None + try: + model = args[0] if len(args) > 0 else kwargs["model"] + except Exception: + model = None + if ( + call_type != CallTypes.image_generation.value + and call_type != CallTypes.text_completion.value + ): + raise ValueError("model param not passed in.") - def _wrapper(original_function, *args, **kwargs): # noqa: PLR0915 # DO NOT MOVE THIS. It always needs to run first # Check if this is an async function. If so only execute the async function if ( @@ -782,7 +740,24 @@ def _wrapper(original_function, *args, **kwargs): # noqa: PLR0915 raise Exception("Max retries per request hit!") # MODEL CALL - result = original_function(*args, **kwargs) + result = run_with_retries( + original_function=original_function, + original_function_args=args, + original_function_kwargs=kwargs, + num_retries=num_retries, + retry_after=0, + retry_policy=kwargs.get("retry_policy"), + fallbacks=kwargs.get("fallbacks", []), + context_window_fallbacks=kwargs.get( + "context_window_fallback_dict", {} + ).get(model, []), + content_policy_fallbacks=[], + get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( + model + ), + log_retry=lambda kwargs, e: kwargs, + model_list=[], + ) if "stream" in kwargs and kwargs["stream"] is True: if ( "complete_response" in kwargs @@ -812,16 +787,6 @@ def _wrapper(original_function, *args, **kwargs): # noqa: PLR0915 if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) - model = _get_model_from_wrapper_args( - call_type=original_function.__name__, - empty_model_call_types=[ - CallTypes.image_generation.value, - CallTypes.text_completion.value, - ], - args=args, - kwargs=kwargs, - ) - if logging_obj is None: logging_obj, kwargs = function_setup( original_function.__name__, rules_obj, start_time, *args, **kwargs @@ -924,7 +889,24 @@ def _wrapper(original_function, *args, **kwargs): # noqa: PLR0915 except Exception as e: print_verbose(f"Error while checking max token limit: {str(e)}") # MODEL CALL - result = original_function(*args, **kwargs) + result = run_with_retries( + original_function=original_function, + original_function_args=args, + original_function_kwargs=kwargs, + num_retries=num_retries, + retry_after=0, + retry_policy=kwargs.get("retry_policy"), + fallbacks=kwargs.get("fallbacks", []), + context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( + model, [] + ), + content_policy_fallbacks=[], + get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( + model + ), + log_retry=lambda kwargs, e: kwargs, + model_list=[], + ) end_time = datetime.datetime.now() if "stream" in kwargs and kwargs["stream"] is True: if "complete_response" in kwargs and kwargs["complete_response"] is True: @@ -989,43 +971,7 @@ def _wrapper(original_function, *args, **kwargs): # noqa: PLR0915 return result @wraps(original_function) - async def wrapper_async(*args, **kwargs): - model = ( - _get_model_from_wrapper_args( - call_type=original_function.__name__, - empty_model_call_types=[ - CallTypes.aimage_generation.value, - CallTypes.atext_completion.value, - CallTypes.amoderation.value, - ], - args=args, - kwargs=kwargs, - ) - or "" - ) - num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) - return await async_run_with_retries( - original_function=lambda *args, **kwargs: _wrapper_async( - original_function, *args, **kwargs - ), - original_function_args=args, - original_function_kwargs=kwargs, - num_retries=num_retries, - retry_after=0, - retry_policy=kwargs.get("retry_policy"), - fallbacks=kwargs.get("fallbacks", []), - context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( - model, [] - ), - content_policy_fallbacks=[], - get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( - model - ), - log_retry=lambda kwargs, e: kwargs, - model_list=[], - ) - - async def _wrapper_async(original_function, *args, **kwargs): # noqa: PLR0915 + async def wrapper_async(*args, **kwargs): # noqa: PLR0915 print_args_passed_to_litellm(original_function, args, kwargs) start_time = datetime.datetime.now() result = None @@ -1042,19 +988,16 @@ async def _wrapper_async(original_function, *args, **kwargs): # noqa: PLR0915 if "litellm_call_id" not in kwargs: kwargs["litellm_call_id"] = str(uuid.uuid4()) - model = ( - _get_model_from_wrapper_args( - call_type=original_function.__name__, - empty_model_call_types=[ - CallTypes.aimage_generation.value, - CallTypes.atext_completion.value, - CallTypes.amoderation.value, - ], - args=args, - kwargs=kwargs, - ) - or "" - ) + model = "" + try: + model = args[0] if len(args) > 0 else kwargs["model"] + except Exception: + if ( + call_type != CallTypes.aimage_generation.value # model optional + and call_type != CallTypes.atext_completion.value # can also be engine + and call_type != CallTypes.amoderation.value + ): + raise ValueError("model param not passed in.") if logging_obj is None: logging_obj, kwargs = function_setup( @@ -1095,7 +1038,27 @@ async def _wrapper_async(original_function, *args, **kwargs): # noqa: PLR0915 return _caching_handler_response.final_embedding_cached_response # MODEL CALL - result = await original_function(*args, **kwargs) + num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) + result = async_run_with_retries( + original_function=lambda *args, **kwargs: _wrapper_async( + original_function, *args, **kwargs + ), + original_function_args=args, + original_function_kwargs=kwargs, + num_retries=num_retries, + retry_after=0, + retry_policy=kwargs.get("retry_policy"), + fallbacks=kwargs.get("fallbacks", []), + context_window_fallbacks=kwargs.get("context_window_fallback_dict", {}).get( + model, [] + ), + content_policy_fallbacks=[], + get_healthy_deployments=lambda *args, **kwargs: _get_mock_healthy_deployments( + model + ), + log_retry=lambda kwargs, e: kwargs, + model_list=[], + ) end_time = datetime.datetime.now() if "stream" in kwargs and kwargs["stream"] is True: if "complete_response" in kwargs and kwargs["complete_response"] is True: From 070ef8695fd8e6db9910e228b862242973101060 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Wed, 27 Nov 2024 02:53:06 -0800 Subject: [PATCH 21/27] fix Signed-off-by: dbczumar --- litellm/proxy/_experimental/out/404.html | 1 + litellm/proxy/_experimental/out/model_hub.html | 1 + litellm/proxy/_experimental/out/onboarding.html | 1 + 3 files changed, 3 insertions(+) create mode 100644 litellm/proxy/_experimental/out/404.html create mode 100644 litellm/proxy/_experimental/out/model_hub.html create mode 100644 litellm/proxy/_experimental/out/onboarding.html diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html new file mode 100644 index 000000000000..223f5d80e078 --- /dev/null +++ b/litellm/proxy/_experimental/out/404.html @@ -0,0 +1 @@ +404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html new file mode 100644 index 000000000000..9d1f566c5847 --- /dev/null +++ b/litellm/proxy/_experimental/out/model_hub.html @@ -0,0 +1 @@ +LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html new file mode 100644 index 000000000000..abd1ff84c5b5 --- /dev/null +++ b/litellm/proxy/_experimental/out/onboarding.html @@ -0,0 +1 @@ +LiteLLM Dashboard \ No newline at end of file From e69e11f178158dbb08706343422d6b0b0fe976d8 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Wed, 27 Nov 2024 03:05:45 -0800 Subject: [PATCH 22/27] fix Signed-off-by: dbczumar --- litellm/router_utils/retry_utils.py | 14 +++++++------- litellm/utils.py | 6 ++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index a7320933f749..1add9a8971f2 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -66,13 +66,13 @@ def run_with_retries( return loop.run_until_complete(async_task) except RuntimeError: pass - else: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(async_task) - finally: - loop.close() + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(async_task) + finally: + loop.close() async def async_run_with_retries( diff --git a/litellm/utils.py b/litellm/utils.py index f9e3e909c5e5..b4b54e03652b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1039,10 +1039,8 @@ async def wrapper_async(*args, **kwargs): # noqa: PLR0915 # MODEL CALL num_retries = _get_and_reset_retries_for_wrapper_call(kwargs) - result = async_run_with_retries( - original_function=lambda *args, **kwargs: _wrapper_async( - original_function, *args, **kwargs - ), + result = await async_run_with_retries( + original_function=original_function, original_function_args=args, original_function_kwargs=kwargs, num_retries=num_retries, From 3f16f1950161c72cbb7e8fe5867afc1e21a86ab4 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Wed, 27 Nov 2024 03:07:59 -0800 Subject: [PATCH 23/27] fix Signed-off-by: dbczumar --- litellm/router_utils/retry_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/litellm/router_utils/retry_utils.py b/litellm/router_utils/retry_utils.py index 1add9a8971f2..9c3ec95bb9eb 100644 --- a/litellm/router_utils/retry_utils.py +++ b/litellm/router_utils/retry_utils.py @@ -6,6 +6,7 @@ import openai import litellm +from litellm._logging import verbose_router_logger from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs from litellm.types.router import DeploymentTypedDict, RetryPolicy @@ -287,10 +288,9 @@ def handle_mock_testing_rate_limit_error( mock_testing_rate_limit_error is not None and mock_testing_rate_limit_error is True ): - # TODO: Figure out logging - take a logger arg? - # verbose_router_logger.info( - # f"litellm.router.py::_mock_rate_limit_error() - Raising mock RateLimitError for model={model_group}" - # ) + verbose_router_logger.info( + f"litellm.router.py::_mock_rate_limit_error() - Raising mock RateLimitError for model={model_group}" + ) raise litellm.RateLimitError( model=model_group, llm_provider="", From 63e171ca5d498d48c103a3eac29190a452f28e54 Mon Sep 17 00:00:00 2001 From: dbczumar Date: Sun, 1 Dec 2024 22:07:24 -0800 Subject: [PATCH 24/27] fix Signed-off-by: dbczumar --- .circleci/config.yml | 3 +- docs/my-website/docs/moderation.md | 135 ++++++++ docs/my-website/docs/observability/argilla.md | 49 ++- docs/my-website/docs/proxy/config_settings.md | 28 +- docs/my-website/docs/proxy/configs.md | 71 ---- docs/my-website/docs/proxy/db_info.md | 14 +- docs/my-website/docs/proxy/prod.md | 14 +- docs/my-website/docs/proxy/prometheus.md | 10 + docs/my-website/docs/routing.md | 19 + docs/my-website/docs/text_completion.md | 174 ++++++++++ docs/my-website/docs/wildcard_routing.md | 140 ++++++++ docs/my-website/sidebars.js | 4 +- enterprise/utils.py | 39 +-- litellm/integrations/datadog/datadog.py | 44 ++- .../bedrock/chat/converse_transformation.py | 2 +- litellm/llms/custom_httpx/http_handler.py | 77 ++++- litellm/llms/prompt_templates/factory.py | 39 ++- .../gemini/transformation.py | 4 + ...odel_prices_and_context_window_backup.json | 66 +++- litellm/proxy/_experimental/out/404.html | 1 - .../static/chunks/131-3d2257b0ff5aadb2.js | 8 - .../static/chunks/131-4ee1d633e8928742.js | 8 + .../static/chunks/626-0c564a21577c9c53.js | 13 + .../static/chunks/626-4e8df4039ecf4386.js | 13 - ...3027703b2e8.js => 902-292bb6a83427dbc7.js} | 0 .../chunks/app/layout-05e5448bd170dbcb.js | 1 + .../chunks/app/layout-61827b157521da1b.js | 1 - ...a6b5e5b14c.js => page-748a83a8e772a56b.js} | 2 +- ...be58b9d19c.js => page-884a15d08f8be397.js} | 2 +- ...d7217f38ce.js => page-a952da77e0730c7c.js} | 2 +- ...b53edf.js => main-app-096338c8e1915716.js} | 2 +- ...0c46e0b.js => webpack-b9c71b6f9761a436.js} | 2 +- ...56a1984d35914.css => ea3759ed931c00b2.css} | 2 +- .../static/media/05a31a2ca4975f99-s.woff2 | Bin 0 -> 10496 bytes .../static/media/26a46d62cd723877-s.woff2 | Bin 18820 -> 0 bytes .../static/media/513657b02c5c193f-s.woff2 | Bin 0 -> 17612 bytes .../static/media/51ed15f9841b9f9d-s.woff2 | Bin 0 -> 22524 bytes .../static/media/55c55f0601d81cf3-s.woff2 | Bin 25908 -> 0 bytes .../static/media/581909926a08bbc8-s.woff2 | Bin 19072 -> 0 bytes .../static/media/6d93bde91c0c2823-s.woff2 | Bin 74316 -> 0 bytes .../static/media/97e0cb1ae144a2a9-s.woff2 | Bin 11220 -> 0 bytes .../static/media/a34f9d1faa5f3315-s.p.woff2 | Bin 48556 -> 0 bytes .../static/media/c9a5bc6a7c948fb0-s.p.woff2 | Bin 0 -> 46552 bytes .../static/media/d6b16ce4a6175f26-s.woff2 | Bin 0 -> 80044 bytes .../static/media/df0a9ae256c0569c-s.woff2 | Bin 10280 -> 0 bytes .../static/media/ec159349637c90ad-s.woff2 | Bin 0 -> 27316 bytes .../static/media/fd4db3eb5472fc27-s.woff2 | Bin 0 -> 12768 bytes .../_buildManifest.js | 0 .../_ssgManifest.js | 0 litellm/proxy/_experimental/out/index.html | 2 +- litellm/proxy/_experimental/out/index.txt | 4 +- .../proxy/_experimental/out/model_hub.html | 1 - litellm/proxy/_experimental/out/model_hub.txt | 4 +- .../proxy/_experimental/out/onboarding.html | 1 - .../proxy/_experimental/out/onboarding.txt | 4 +- litellm/proxy/_new_secret_config.yaml | 22 +- litellm/proxy/_types.py | 8 + litellm/proxy/auth/auth_checks.py | 54 ++- litellm/proxy/auth/user_api_key_auth.py | 53 +++ .../proxy/common_utils/http_parsing_utils.py | 40 ++- .../guardrail_hooks/bedrock_guardrails.py | 4 +- litellm/proxy/hooks/proxy_failure_handler.py | 87 +++++ litellm/proxy/litellm_pre_call_utils.py | 46 ++- .../internal_user_endpoints.py | 105 +++--- .../key_management_endpoints.py | 126 +++++-- .../management_endpoints/team_endpoints.py | 3 +- litellm/proxy/proxy_config.yaml | 2 +- litellm/proxy/proxy_server.py | 82 +---- litellm/proxy/route_llm_request.py | 4 +- litellm/proxy/utils.py | 10 +- litellm/router.py | 324 +++++++++++++++--- .../router_utils/pattern_match_deployments.py | 18 +- litellm/router_utils/response_headers.py | 0 litellm/router_utils/retry_utils.py | 50 +-- litellm/tests/test_mlflow.py | 29 -- litellm/types/router.py | 14 +- litellm/types/utils.py | 2 + litellm/utils.py | 2 + model_prices_and_context_window.json | 66 +++- pyproject.toml | 4 +- requirements.txt | 2 +- tests/documentation_tests/test_env_keys.py | 10 +- .../test_router_settings.py | 87 +++++ tests/llm_translation/Readme.md | 4 +- tests/llm_translation/base_llm_unit_tests.py | 11 +- .../test_anthropic_completion.py | 65 ++++ tests/llm_translation/test_azure_ai.py | 113 +++--- .../test_bedrock_completion.py | 13 + .../test_max_completion_tokens.py | 105 +++--- tests/llm_translation/test_nvidia_nim.py | 131 +++---- ...nai_prediction_param.py => test_openai.py} | 153 ++++++--- tests/llm_translation/test_openai_o1.py | 110 +++--- tests/llm_translation/test_supports_vision.py | 94 ----- .../test_text_completion_unit_tests.py | 94 ++--- tests/llm_translation/test_vertex.py | 15 + tests/local_testing/test_auth_checks.py | 104 ++++++ tests/local_testing/test_azure_openai.py | 2 +- tests/local_testing/test_azure_perf.py | 232 ++++++------- tests/local_testing/test_exceptions.py | 4 +- tests/local_testing/test_get_model_info.py | 14 + .../local_testing/test_http_parsing_utils.py | 79 +++++ tests/local_testing/test_router.py | 8 +- tests/local_testing/test_router_init.py | 2 +- tests/local_testing/test_router_utils.py | 182 ++++++++++ .../local_testing/test_tpm_rpm_routing_v2.py | 2 +- tests/local_testing/test_user_api_key_auth.py | 15 + tests/logging_callback_tests/test_datadog.py | 44 +++ tests/otel_tests/test_guardrails.py | 2 +- tests/otel_tests/test_moderations.py | 71 ++++ .../test_key_management.py | 44 +++ .../test_key_generate_prisma.py | 228 +++++++++++- tests/proxy_unit_tests/test_proxy_server.py | 123 +++++++ tests/proxy_unit_tests/test_proxy_utils.py | 111 +++++- .../test_unit_test_proxy_hooks.py | 111 ++++++ .../test_router_endpoints.py | 2 +- .../test_router_helper_utils.py | 20 +- tests/test_keys.py | 1 + tests/test_spend_logs.py | 3 +- ui/litellm-dashboard/out/404.html | 1 - .../WeMIGILYzOYN-R9DXbvCD/_buildManifest.js | 1 - .../WeMIGILYzOYN-R9DXbvCD/_ssgManifest.js | 1 - .../static/chunks/131-3d2257b0ff5aadb2.js | 8 - .../chunks/2f6dbc85-cac2949a76539886.js | 1 - .../chunks/3014691f-b24e8254c7593934.js | 1 - .../static/chunks/626-4e8df4039ecf4386.js | 13 - .../static/chunks/684-16b194c83a169f6d.js | 1 - .../static/chunks/69-8316d07d1f41e39f.js | 1 - .../static/chunks/777-9d9df0b75010dbf9.js | 1 - .../static/chunks/902-58bf23027703b2e8.js | 13 - .../chunks/app/_not-found-4163791cb6a88df1.js | 1 - .../chunks/app/layout-61827b157521da1b.js | 1 - .../app/model_hub/page-104cada6b5e5b14c.js | 1 - .../app/onboarding/page-bad6cfbe58b9d19c.js | 1 - .../chunks/app/page-68b04cd7217f38ce.js | 1 - .../chunks/fd9d1056-f593049e31b05aeb.js | 1 - .../chunks/framework-b370f160bb96059c.js | 33 -- .../static/chunks/main-a61244f130fbf565.js | 1 - .../chunks/main-app-9b4fb13a7db53edf.js | 1 - .../chunks/pages/_app-d21e88acd55d90f1.js | 1 - .../chunks/pages/_error-d6107f1aac0c574c.js | 1 - .../chunks/polyfills-c67a75d1b6f99dc8.js | 1 - .../static/chunks/webpack-e8ad0a25b0c46e0b.js | 1 - .../out/_next/static/css/00256a1984d35914.css | 5 - .../static/media/26a46d62cd723877-s.woff2 | Bin 18820 -> 0 bytes .../static/media/55c55f0601d81cf3-s.woff2 | Bin 25908 -> 0 bytes .../static/media/581909926a08bbc8-s.woff2 | Bin 19072 -> 0 bytes .../static/media/6d93bde91c0c2823-s.woff2 | Bin 74316 -> 0 bytes .../static/media/97e0cb1ae144a2a9-s.woff2 | Bin 11220 -> 0 bytes .../static/media/a34f9d1faa5f3315-s.p.woff2 | Bin 48556 -> 0 bytes .../static/media/df0a9ae256c0569c-s.woff2 | Bin 10280 -> 0 bytes ui/litellm-dashboard/out/favicon.ico | Bin 15406 -> 0 bytes ui/litellm-dashboard/out/index.html | 1 - ui/litellm-dashboard/out/index.txt | 7 - ui/litellm-dashboard/out/model_hub.html | 1 - ui/litellm-dashboard/out/model_hub.txt | 7 - ui/litellm-dashboard/out/next.svg | 1 - ui/litellm-dashboard/out/onboarding.html | 1 - ui/litellm-dashboard/out/onboarding.txt | 7 - ui/litellm-dashboard/out/vercel.svg | 1 - .../src/components/view_key_table.tsx | 12 + 160 files changed, 3492 insertions(+), 1209 deletions(-) create mode 100644 docs/my-website/docs/moderation.md create mode 100644 docs/my-website/docs/text_completion.md create mode 100644 docs/my-website/docs/wildcard_routing.md delete mode 100644 litellm/proxy/_experimental/out/404.html delete mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js create mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/131-4ee1d633e8928742.js create mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/626-0c564a21577c9c53.js delete mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/626-4e8df4039ecf4386.js rename litellm/proxy/_experimental/out/_next/static/chunks/{902-58bf23027703b2e8.js => 902-292bb6a83427dbc7.js} (100%) create mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/app/layout-05e5448bd170dbcb.js delete mode 100644 litellm/proxy/_experimental/out/_next/static/chunks/app/layout-61827b157521da1b.js rename litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/{page-104cada6b5e5b14c.js => page-748a83a8e772a56b.js} (97%) rename litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/{page-bad6cfbe58b9d19c.js => page-884a15d08f8be397.js} (94%) rename litellm/proxy/_experimental/out/_next/static/chunks/app/{page-68b04cd7217f38ce.js => page-a952da77e0730c7c.js} (51%) rename litellm/proxy/_experimental/out/_next/static/chunks/{main-app-9b4fb13a7db53edf.js => main-app-096338c8e1915716.js} (54%) rename litellm/proxy/_experimental/out/_next/static/chunks/{webpack-e8ad0a25b0c46e0b.js => webpack-b9c71b6f9761a436.js} (98%) rename litellm/proxy/_experimental/out/_next/static/css/{00256a1984d35914.css => ea3759ed931c00b2.css} (99%) create mode 100644 litellm/proxy/_experimental/out/_next/static/media/05a31a2ca4975f99-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2 create mode 100644 litellm/proxy/_experimental/out/_next/static/media/513657b02c5c193f-s.woff2 create mode 100644 litellm/proxy/_experimental/out/_next/static/media/51ed15f9841b9f9d-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2 create mode 100644 litellm/proxy/_experimental/out/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2 create mode 100644 litellm/proxy/_experimental/out/_next/static/media/d6b16ce4a6175f26-s.woff2 delete mode 100644 litellm/proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2 create mode 100644 litellm/proxy/_experimental/out/_next/static/media/ec159349637c90ad-s.woff2 create mode 100644 litellm/proxy/_experimental/out/_next/static/media/fd4db3eb5472fc27-s.woff2 rename litellm/proxy/_experimental/out/_next/static/{WeMIGILYzOYN-R9DXbvCD => pDx3dChtj-paUmJExuV6u}/_buildManifest.js (100%) rename litellm/proxy/_experimental/out/_next/static/{WeMIGILYzOYN-R9DXbvCD => pDx3dChtj-paUmJExuV6u}/_ssgManifest.js (100%) delete mode 100644 litellm/proxy/_experimental/out/model_hub.html delete mode 100644 litellm/proxy/_experimental/out/onboarding.html create mode 100644 litellm/proxy/hooks/proxy_failure_handler.py create mode 100644 litellm/router_utils/response_headers.py delete mode 100644 litellm/tests/test_mlflow.py create mode 100644 tests/documentation_tests/test_router_settings.py rename tests/llm_translation/{test_openai_prediction_param.py => test_openai.py} (54%) delete mode 100644 tests/llm_translation/test_supports_vision.py create mode 100644 tests/local_testing/test_http_parsing_utils.py create mode 100644 tests/otel_tests/test_moderations.py create mode 100644 tests/proxy_unit_tests/test_unit_test_proxy_hooks.py delete mode 100644 ui/litellm-dashboard/out/404.html delete mode 100644 ui/litellm-dashboard/out/_next/static/WeMIGILYzOYN-R9DXbvCD/_buildManifest.js delete mode 100644 ui/litellm-dashboard/out/_next/static/WeMIGILYzOYN-R9DXbvCD/_ssgManifest.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/131-3d2257b0ff5aadb2.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/3014691f-b24e8254c7593934.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/626-4e8df4039ecf4386.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/684-16b194c83a169f6d.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/69-8316d07d1f41e39f.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/777-9d9df0b75010dbf9.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/902-58bf23027703b2e8.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/_not-found-4163791cb6a88df1.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/layout-61827b157521da1b.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/model_hub/page-104cada6b5e5b14c.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/onboarding/page-bad6cfbe58b9d19c.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/app/page-68b04cd7217f38ce.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/framework-b370f160bb96059c.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/main-a61244f130fbf565.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/pages/_app-d21e88acd55d90f1.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/pages/_error-d6107f1aac0c574c.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js delete mode 100644 ui/litellm-dashboard/out/_next/static/chunks/webpack-e8ad0a25b0c46e0b.js delete mode 100644 ui/litellm-dashboard/out/_next/static/css/00256a1984d35914.css delete mode 100644 ui/litellm-dashboard/out/_next/static/media/26a46d62cd723877-s.woff2 delete mode 100644 ui/litellm-dashboard/out/_next/static/media/55c55f0601d81cf3-s.woff2 delete mode 100644 ui/litellm-dashboard/out/_next/static/media/581909926a08bbc8-s.woff2 delete mode 100644 ui/litellm-dashboard/out/_next/static/media/6d93bde91c0c2823-s.woff2 delete mode 100644 ui/litellm-dashboard/out/_next/static/media/97e0cb1ae144a2a9-s.woff2 delete mode 100644 ui/litellm-dashboard/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2 delete mode 100644 ui/litellm-dashboard/out/_next/static/media/df0a9ae256c0569c-s.woff2 delete mode 100644 ui/litellm-dashboard/out/favicon.ico delete mode 100644 ui/litellm-dashboard/out/index.html delete mode 100644 ui/litellm-dashboard/out/index.txt delete mode 100644 ui/litellm-dashboard/out/model_hub.html delete mode 100644 ui/litellm-dashboard/out/model_hub.txt delete mode 100644 ui/litellm-dashboard/out/next.svg delete mode 100644 ui/litellm-dashboard/out/onboarding.html delete mode 100644 ui/litellm-dashboard/out/onboarding.txt delete mode 100644 ui/litellm-dashboard/out/vercel.svg diff --git a/.circleci/config.yml b/.circleci/config.yml index fbf3cb867d8d..059742d51bb5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -811,7 +811,8 @@ jobs: - run: python ./tests/code_coverage_tests/router_code_coverage.py - run: python ./tests/code_coverage_tests/test_router_strategy_async.py - run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py - # - run: python ./tests/documentation_tests/test_env_keys.py + - run: python ./tests/documentation_tests/test_env_keys.py + - run: python ./tests/documentation_tests/test_router_settings.py - run: python ./tests/documentation_tests/test_api_docs.py - run: python ./tests/code_coverage_tests/ensure_async_clients_test.py - run: helm lint ./deploy/charts/litellm-helm diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md new file mode 100644 index 000000000000..6dd092fb52ac --- /dev/null +++ b/docs/my-website/docs/moderation.md @@ -0,0 +1,135 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Moderation + + +### Usage + + + +```python +from litellm import moderation + +response = moderation( + input="hello from litellm", + model="text-moderation-stable" +) +``` + + + + +For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml** + +Start litellm proxy server + +``` +litellm +``` + + + + + +```python +from openai import OpenAI + +# set base_url to your proxy server +# set api_key to send to proxy server +client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") + +response = client.moderations.create( + input="hello from litellm", + model="text-moderation-stable" # optional, defaults to `omni-moderation-latest` +) + +print(response) +``` + + + + +```shell +curl --location 'http://0.0.0.0:4000/moderations' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --data '{"input": "Sample text goes here", "model": "text-moderation-stable"}' +``` + + + + + + +## Input Params +LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers. + +### Required Fields + +- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models. + - If string: A string of text to classify for moderation + - If array of strings: An array of strings to classify for moderation + - If array of objects: An array of multi-modal inputs to the moderation model, where each object can be: + - An object describing an image to classify with: + - `type`: *string, required* - Always `image_url` + - `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image + - An object describing text to classify with: + - `type`: *string, required* - Always `text` + - `text`: *string, required* - A string of text to classify + +### Optional Fields + +- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`. + +## Output Format +Here's the exact json output and type you can expect from all moderation calls: + +[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object) + + +```python +{ + "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR", + "model": "text-moderation-007", + "results": [ + { + "flagged": true, + "categories": { + "sexual": false, + "hate": false, + "harassment": true, + "self-harm": false, + "sexual/minors": false, + "hate/threatening": false, + "violence/graphic": false, + "self-harm/intent": false, + "self-harm/instructions": false, + "harassment/threatening": true, + "violence": true + }, + "category_scores": { + "sexual": 0.000011726012417057063, + "hate": 0.22706663608551025, + "harassment": 0.5215635299682617, + "self-harm": 2.227119921371923e-6, + "sexual/minors": 7.107352217872176e-8, + "hate/threatening": 0.023547329008579254, + "violence/graphic": 0.00003391829886822961, + "self-harm/intent": 1.646940972932498e-6, + "self-harm/instructions": 1.1198755256458526e-9, + "harassment/threatening": 0.5694745779037476, + "violence": 0.9971134662628174 + } + } + ] +} + +``` + + +## **Supported Providers** + +| Provider | +|-------------| +| OpenAI | diff --git a/docs/my-website/docs/observability/argilla.md b/docs/my-website/docs/observability/argilla.md index 8d20b9daab66..dad28ce90c88 100644 --- a/docs/my-website/docs/observability/argilla.md +++ b/docs/my-website/docs/observability/argilla.md @@ -4,24 +4,63 @@ import TabItem from '@theme/TabItem'; # Argilla -Argilla is a tool for annotating datasets. +Argilla is a collaborative annotation tool for AI engineers and domain experts who need to build high-quality datasets for their projects. +## Getting Started -## Usage +To log the data to Argilla, first you need to deploy the Argilla server. If you have not deployed the Argilla server, please follow the instructions [here](https://docs.argilla.io/latest/getting_started/quickstart/). + +Next, you will need to configure and create the Argilla dataset. + +```python +import argilla as rg + +client = rg.Argilla(api_url="", api_key="") + +settings = rg.Settings( + guidelines="These are some guidelines.", + fields=[ + rg.ChatField( + name="user_input", + ), + rg.TextField( + name="llm_output", + ), + ], + questions=[ + rg.RatingQuestion( + name="rating", + values=[1, 2, 3, 4, 5, 6, 7], + ), + ], +) + +dataset = rg.Dataset( + name="my_first_dataset", + settings=settings, +) + +dataset.create() +``` + +For further configuration, please refer to the [Argilla documentation](https://docs.argilla.io/latest/how_to_guides/dataset/). + + +## Usage ```python -from litellm import completion +import os import litellm -import os +from litellm import completion # add env vars os.environ["ARGILLA_API_KEY"]="argilla.apikey" os.environ["ARGILLA_BASE_URL"]="http://localhost:6900" -os.environ["ARGILLA_DATASET_NAME"]="my_second_dataset" +os.environ["ARGILLA_DATASET_NAME"]="my_first_dataset" os.environ["OPENAI_API_KEY"]="sk-proj-..." litellm.callbacks = ["argilla"] diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 91deba95868a..c762a0716c05 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -279,7 +279,31 @@ router_settings: | retry_policy | object | Specifies the number of retries for different types of exceptions. [More information here](reliability) | | allowed_fails | integer | The number of failures allowed before cooling down a model. [More information here](reliability) | | allowed_fails_policy | object | Specifies the number of allowed failures for different error types before cooling down a deployment. [More information here](reliability) | - +| default_max_parallel_requests | Optional[int] | The default maximum number of parallel requests for a deployment. | +| default_priority | (Optional[int]) | The default priority for a request. Only for '.scheduler_acompletion()'. Default is None. | +| polling_interval | (Optional[float]) | frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms. | +| max_fallbacks | Optional[int] | The maximum number of fallbacks to try before exiting the call. Defaults to 5. | +| default_litellm_params | Optional[dict] | The default litellm parameters to add to all requests (e.g. `temperature`, `max_tokens`). | +| timeout | Optional[float] | The default timeout for a request. | +| debug_level | Literal["DEBUG", "INFO"] | The debug level for the logging library in the router. Defaults to "INFO". | +| client_ttl | int | Time-to-live for cached clients in seconds. Defaults to 3600. | +| cache_kwargs | dict | Additional keyword arguments for the cache initialization. | +| routing_strategy_args | dict | Additional keyword arguments for the routing strategy - e.g. lowest latency routing default ttl | +| model_group_alias | dict | Model group alias mapping. E.g. `{"claude-3-haiku": "claude-3-haiku-20240229"}` | +| num_retries | int | Number of retries for a request. Defaults to 3. | +| default_fallbacks | Optional[List[str]] | Fallbacks to try if no model group-specific fallbacks are defined. | +| caching_groups | Optional[List[tuple]] | List of model groups for caching across model groups. Defaults to None. - e.g. caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")]| +| alerting_config | AlertingConfig | [SDK-only arg] Slack alerting configuration. Defaults to None. [Further Docs](../routing.md#alerting-) | +| assistants_config | AssistantsConfig | Set on proxy via `assistant_settings`. [Further docs](../assistants.md) | +| set_verbose | boolean | [DEPRECATED PARAM - see debug docs](./debugging.md) If true, sets the logging level to verbose. | +| retry_after | int | Time to wait before retrying a request in seconds. Defaults to 0. If `x-retry-after` is received from LLM API, this value is overridden. | +| provider_budget_config | ProviderBudgetConfig | Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None. [Further Docs](./provider_budget_routing.md) | +| enable_pre_call_checks | boolean | If true, checks if a call is within the model's context window before making the call. [More information here](reliability) | +| model_group_retry_policy | Dict[str, RetryPolicy] | [SDK-only arg] Set retry policy for model groups. | +| context_window_fallbacks | List[Dict[str, List[str]]] | Fallback models for context window violations. | +| redis_url | str | URL for Redis server. **Known performance issue with Redis URL.** | +| cache_responses | boolean | Flag to enable caching LLM Responses, if cache set under `router_settings`. If true, caches responses. Defaults to False. | +| router_general_settings | RouterGeneralSettings | [SDK-Only] Router general settings - contains optimizations like 'async_only_mode'. [Docs](../routing.md#router-general-settings) | ### environment variables - Reference @@ -335,6 +359,8 @@ router_settings: | DD_SITE | Site URL for Datadog (e.g., datadoghq.com) | DD_SOURCE | Source identifier for Datadog logs | DD_ENV | Environment identifier for Datadog logs. Only supported for `datadog_llm_observability` callback +| DD_SERVICE | Service identifier for Datadog logs. Defaults to "litellm-server" +| DD_VERSION | Version identifier for Datadog logs. Defaults to "unknown" | DEBUG_OTEL | Enable debug mode for OpenTelemetry | DIRECT_URL | Direct URL for service endpoint | DISABLE_ADMIN_UI | Toggle to disable the admin UI diff --git a/docs/my-website/docs/proxy/configs.md b/docs/my-website/docs/proxy/configs.md index ccb9872d6787..7876c9dec167 100644 --- a/docs/my-website/docs/proxy/configs.md +++ b/docs/my-website/docs/proxy/configs.md @@ -357,77 +357,6 @@ curl --location 'http://0.0.0.0:4000/v1/model/info' \ --data '' ``` - -### Provider specific wildcard routing -**Proxy all models from a provider** - -Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml** - -**Step 1** - define provider specific routing on config.yaml -```yaml -model_list: - # provider specific wildcard routing - - model_name: "anthropic/*" - litellm_params: - model: "anthropic/*" - api_key: os.environ/ANTHROPIC_API_KEY - - model_name: "groq/*" - litellm_params: - model: "groq/*" - api_key: os.environ/GROQ_API_KEY - - model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*" - litellm_params: - model: "openai/fo::*:static::*" - api_key: os.environ/OPENAI_API_KEY -``` - -Step 2 - Run litellm proxy - -```shell -$ litellm --config /path/to/config.yaml -``` - -Step 3 Test it - -Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "anthropic/claude-3-sonnet-20240229", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - -Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "groq/llama3-8b-8192", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - -Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*` -```shell -curl http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-1234" \ - -d '{ - "model": "fo::hi::static::hi", - "messages": [ - {"role": "user", "content": "Hello, Claude!"} - ] - }' -``` - ### Load Balancing :::info diff --git a/docs/my-website/docs/proxy/db_info.md b/docs/my-website/docs/proxy/db_info.md index 6e6a48bd1105..8429f636034c 100644 --- a/docs/my-website/docs/proxy/db_info.md +++ b/docs/my-website/docs/proxy/db_info.md @@ -50,18 +50,22 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma | LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** | | LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** | -## How to Disable `LiteLLM_SpendLogs` +## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs` -You can disable spend_logs by setting `disable_spend_logs` to `True` on the `general_settings` section of your proxy_config.yaml file. +You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file. ```yaml general_settings: - disable_spend_logs: True + disable_spend_logs: True # Disable writing spend logs to DB + disable_error_logs: True # Disable writing error logs to DB ``` +### What is the impact of disabling these logs? -### What is the impact of disabling `LiteLLM_SpendLogs`? - +When disabling spend logs (`disable_spend_logs: True`): - You **will not** be able to view Usage on the LiteLLM UI - You **will** continue seeing cost metrics on s3, Prometheus, Langfuse (any other Logging integration you are using) +When disabling error logs (`disable_error_logs: True`): +- You **will not** be able to view Errors on the LiteLLM UI +- You **will** continue seeing error logs in your application logs and any other logging integrations you are using diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index 32a6fceeeb72..9dacedaabc84 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -23,6 +23,7 @@ general_settings: # OPTIONAL Best Practices disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus + disable_error_logs: True # turn off writing LLM Exceptions to DB allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet. litellm_settings: @@ -102,17 +103,22 @@ general_settings: allow_requests_on_db_unavailable: True ``` -## 6. Disable spend_logs if you're not using the LiteLLM UI +## 6. Disable spend_logs & error_logs if not using the LiteLLM UI -By default LiteLLM will write every request to the `LiteLLM_SpendLogs` table. This is used for viewing Usage on the LiteLLM UI. +By default, LiteLLM writes several types of logs to the database: +- Every LLM API request to the `LiteLLM_SpendLogs` table +- LLM Exceptions to the `LiteLLM_LogsErrors` table -If you're not viewing Usage on the LiteLLM UI (most users use Prometheus when this is disabled), you can disable spend_logs by setting `disable_spend_logs` to `True`. +If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`: ```yaml general_settings: - disable_spend_logs: True + disable_spend_logs: True # Disable writing spend logs to DB + disable_error_logs: True # Disable writing error logs to DB ``` +[More information about what the Database is used for here](db_info) + ## 7. Use Helm PreSync Hook for Database Migrations [BETA] To ensure only one service manages database migrations, use our [Helm PreSync hook for Database Migrations](https://github.com/BerriAI/litellm/blob/main/deploy/charts/litellm-helm/templates/migrations-job.yaml). This ensures migrations are handled during `helm upgrade` or `helm install`, while LiteLLM pods explicitly disable migrations. diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 58dc3dae3681..f19101b36d58 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -192,3 +192,13 @@ Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Das |----------------------|--------------------------------------| | `litellm_llm_api_failed_requests_metric` | **deprecated** use `litellm_proxy_failed_requests_metric` | | `litellm_requests_metric` | **deprecated** use `litellm_proxy_total_requests_metric` | + + +## FAQ + +### What are `_created` vs. `_total` metrics? + +- `_created` metrics are metrics that are created when the proxy starts +- `_total` metrics are metrics that are incremented for each request + +You should consume the `_total` metrics for your counting purposes \ No newline at end of file diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 702cafa7f62b..87fad7437e47 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -1891,3 +1891,22 @@ router = Router( debug_level="DEBUG" # defaults to INFO ) ``` + +## Router General Settings + +### Usage + +```python +router = Router(model_list=..., router_general_settings=RouterGeneralSettings(async_only_mode=True)) +``` + +### Spec +```python +class RouterGeneralSettings(BaseModel): + async_only_mode: bool = Field( + default=False + ) # this will only initialize async clients. Good for memory utils + pass_through_all_models: bool = Field( + default=False + ) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding +``` \ No newline at end of file diff --git a/docs/my-website/docs/text_completion.md b/docs/my-website/docs/text_completion.md new file mode 100644 index 000000000000..8be40dfdcd27 --- /dev/null +++ b/docs/my-website/docs/text_completion.md @@ -0,0 +1,174 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Text Completion + +### Usage + + + +```python +from litellm import text_completion + +response = text_completion( + model="gpt-3.5-turbo-instruct", + prompt="Say this is a test", + max_tokens=7 +) +``` + + + + +1. Define models on config.yaml + +```yaml +model_list: + - model_name: gpt-3.5-turbo-instruct + litellm_params: + model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create + api_key: os.environ/OPENAI_API_KEY + - model_name: text-davinci-003 + litellm_params: + model: text-completion-openai/text-davinci-003 + api_key: os.environ/OPENAI_API_KEY +``` + +2. Start litellm proxy server + +``` +litellm --config config.yaml +``` + + + + +```python +from openai import OpenAI + +# set base_url to your proxy server +# set api_key to send to proxy server +client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") + +response = client.completions.create( + model="gpt-3.5-turbo-instruct", + prompt="Say this is a test", + max_tokens=7 +) + +print(response) +``` + + + + +```shell +curl --location 'http://0.0.0.0:4000/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --data '{ + "model": "gpt-3.5-turbo-instruct", + "prompt": "Say this is a test", + "max_tokens": 7 + }' +``` + + + + + + +## Input Params + +LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers. + +### Required Fields + +- `model`: *string* - ID of the model to use +- `prompt`: *string or array* - The prompt(s) to generate completions for + +### Optional Fields + +- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one +- `echo`: *boolean* - Echo back the prompt in addition to the completion. +- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency. +- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion +- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5 +- `max_tokens`: *integer* - The maximum number of tokens to generate. +- `n`: *integer* - How many completions to generate for each prompt. +- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far. +- `seed`: *integer* - If specified, system will attempt to make deterministic samples +- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens +- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false +- `suffix`: *string* - The suffix that comes after a completion of inserted text +- `temperature`: *number* - What sampling temperature to use, between 0 and 2. +- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling. +- `user`: *string* - A unique identifier representing your end-user + +## Output Format +Here's the exact JSON output format you can expect from completion calls: + + +[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object) + + + + + +```python +{ + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "gpt-3.5-turbo-instruct", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } +} + +``` + + + +```python +{ + "id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe", + "object": "text_completion", + "created": 1690759702, + "choices": [ + { + "text": "This", + "index": 0, + "logprobs": null, + "finish_reason": null + } + ], + "model": "gpt-3.5-turbo-instruct" + "system_fingerprint": "fp_44709d6fcb", +} + +``` + + + + + +## **Supported Providers** + +| Provider | Link to Usage | +|-------------|--------------------| +| OpenAI | [Usage](../docs/providers/text_completion_openai) | +| Azure OpenAI| [Usage](../docs/providers/azure) | + + diff --git a/docs/my-website/docs/wildcard_routing.md b/docs/my-website/docs/wildcard_routing.md new file mode 100644 index 000000000000..80926d73e598 --- /dev/null +++ b/docs/my-website/docs/wildcard_routing.md @@ -0,0 +1,140 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Provider specific Wildcard routing + +**Proxy all models from a provider** + +Use this if you want to **proxy all models from a specific provider without defining them on the config.yaml** + +## Step 1. Define provider specific routing + + + + +```python +from litellm import Router + +router = Router( + model_list=[ + { + "model_name": "anthropic/*", + "litellm_params": { + "model": "anthropic/*", + "api_key": os.environ["ANTHROPIC_API_KEY"] + } + }, + { + "model_name": "groq/*", + "litellm_params": { + "model": "groq/*", + "api_key": os.environ["GROQ_API_KEY"] + } + }, + { + "model_name": "fo::*:static::*", # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*" + "litellm_params": { + "model": "openai/fo::*:static::*", + "api_key": os.environ["OPENAI_API_KEY"] + } + } + ] +) +``` + + + + +**Step 1** - define provider specific routing on config.yaml +```yaml +model_list: + # provider specific wildcard routing + - model_name: "anthropic/*" + litellm_params: + model: "anthropic/*" + api_key: os.environ/ANTHROPIC_API_KEY + - model_name: "groq/*" + litellm_params: + model: "groq/*" + api_key: os.environ/GROQ_API_KEY + - model_name: "fo::*:static::*" # all requests matching this pattern will be routed to this deployment, example: model="fo::hi::static::hi" will be routed to deployment: "openai/fo::*:static::*" + litellm_params: + model: "openai/fo::*:static::*" + api_key: os.environ/OPENAI_API_KEY +``` + + + +## [PROXY-Only] Step 2 - Run litellm proxy + +```shell +$ litellm --config /path/to/config.yaml +``` + +## Step 3 - Test it + + + + +```python +from litellm import Router + +router = Router(model_list=...) + +# Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` +resp = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hello, Claude!"}]) +print(resp) + +# Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` +resp = completion(model="groq/llama3-8b-8192", messages=[{"role": "user", "content": "Hello, Groq!"}]) +print(resp) + +# Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*` +resp = completion(model="fo::hi::static::hi", messages=[{"role": "user", "content": "Hello, Claude!"}]) +print(resp) +``` + + + + +Test with `anthropic/` - all models with `anthropic/` prefix will get routed to `anthropic/*` +```bash +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "anthropic/claude-3-sonnet-20240229", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +Test with `groq/` - all models with `groq/` prefix will get routed to `groq/*` +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "groq/llama3-8b-8192", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + +Test with `fo::*::static::*` - all requests matching this pattern will be routed to `openai/fo::*:static::*` +```shell +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "fo::hi::static::hi", + "messages": [ + {"role": "user", "content": "Hello, Claude!"} + ] + }' +``` + + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 1deb0dd75f24..e6a028d83139 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -246,6 +246,7 @@ const sidebars = { "completion/usage", ], }, + "text_completion", "embedding/supported_embedding", "image_generation", { @@ -261,6 +262,7 @@ const sidebars = { "batches", "realtime", "fine_tuning", + "moderation", { type: "link", label: "Use LiteLLM Proxy with Vertex, Bedrock SDK", @@ -277,7 +279,7 @@ const sidebars = { description: "Learn how to load balance, route, and set fallbacks for your LLM requests", slug: "/routing-load-balancing", }, - items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing"], + items: ["routing", "scheduler", "proxy/load_balancing", "proxy/reliability", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing", "wildcard_routing"], }, { type: "category", diff --git a/enterprise/utils.py b/enterprise/utils.py index f0af1d676d5f..cc97661d74f6 100644 --- a/enterprise/utils.py +++ b/enterprise/utils.py @@ -2,7 +2,9 @@ from typing import Optional, List from litellm._logging import verbose_logger from litellm.proxy.proxy_server import PrismaClient, HTTPException +from litellm.llms.custom_httpx.http_handler import HTTPHandler import collections +import httpx from datetime import datetime @@ -114,7 +116,6 @@ async def ui_get_spend_by_tags( def _forecast_daily_cost(data: list): - import requests # type: ignore from datetime import datetime, timedelta if len(data) == 0: @@ -136,17 +137,17 @@ def _forecast_daily_cost(data: list): print("last entry date", last_entry_date) - # Assuming today_date is a datetime object - today_date = datetime.now() - # Calculate the last day of the month last_day_of_todays_month = datetime( today_date.year, today_date.month % 12 + 1, 1 ) - timedelta(days=1) + print("last day of todays month", last_day_of_todays_month) # Calculate the remaining days in the month remaining_days = (last_day_of_todays_month - last_entry_date).days + print("remaining days", remaining_days) + current_spend_this_month = 0 series = {} for entry in data: @@ -176,13 +177,19 @@ def _forecast_daily_cost(data: list): "Content-Type": "application/json", } - response = requests.post( - url="https://trend-api-production.up.railway.app/forecast", - json=payload, - headers=headers, - ) - # check the status code - response.raise_for_status() + client = HTTPHandler() + + try: + response = client.post( + url="https://trend-api-production.up.railway.app/forecast", + json=payload, + headers=headers, + ) + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=500, + detail={"error": f"Error getting forecast: {e.response.text}"}, + ) json_response = response.json() forecast_data = json_response["forecast"] @@ -206,13 +213,3 @@ def _forecast_daily_cost(data: list): f"Predicted Spend for { today_month } 2024, ${total_predicted_spend}" ) return {"response": response_data, "predicted_spend": predicted_spend} - - # print(f"Date: {entry['date']}, Spend: {entry['spend']}, Response: {response.text}") - - -# _forecast_daily_cost( -# [ -# {"date": "2022-01-01", "spend": 100}, - -# ] -# ) diff --git a/litellm/integrations/datadog/datadog.py b/litellm/integrations/datadog/datadog.py index 42d9a38d6527..482c2bc10760 100644 --- a/litellm/integrations/datadog/datadog.py +++ b/litellm/integrations/datadog/datadog.py @@ -279,11 +279,11 @@ def create_datadog_logging_payload( verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) dd_payload = DatadogPayload( - ddsource=os.getenv("DD_SOURCE", "litellm"), - ddtags="", - hostname="", + ddsource=self._get_datadog_source(), + ddtags=self._get_datadog_tags(), + hostname=self._get_datadog_hostname(), message=json_payload, - service="litellm-server", + service=self._get_datadog_service(), status=status, ) return dd_payload @@ -387,11 +387,11 @@ async def async_post_call_failure_hook( json_payload = json.dumps(_exception_payload) verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) dd_payload = DatadogPayload( - ddsource=os.getenv("DD_SOURCE", "litellm"), - ddtags="", - hostname="", + ddsource=self._get_datadog_source(), + ddtags=self._get_datadog_tags(), + hostname=self._get_datadog_hostname(), message=json_payload, - service="litellm-server", + service=self._get_datadog_service(), status=DataDogStatus.ERROR, ) @@ -473,11 +473,31 @@ def _create_v0_logging_payload( verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload) dd_payload = DatadogPayload( - ddsource=os.getenv("DD_SOURCE", "litellm"), - ddtags="", - hostname="", + ddsource=self._get_datadog_source(), + ddtags=self._get_datadog_tags(), + hostname=self._get_datadog_hostname(), message=json_payload, - service="litellm-server", + service=self._get_datadog_service(), status=DataDogStatus.INFO, ) return dd_payload + + @staticmethod + def _get_datadog_tags(): + return f"env:{os.getenv('DD_ENV', 'unknown')},service:{os.getenv('DD_SERVICE', 'litellm')},version:{os.getenv('DD_VERSION', 'unknown')}" + + @staticmethod + def _get_datadog_source(): + return os.getenv("DD_SOURCE", "litellm") + + @staticmethod + def _get_datadog_service(): + return os.getenv("DD_SERVICE", "litellm-server") + + @staticmethod + def _get_datadog_hostname(): + return "" + + @staticmethod + def _get_datadog_env(): + return os.getenv("DD_ENV", "unknown") diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 6c08758ddd8a..23ee97a47ef8 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -458,7 +458,7 @@ def _supported_cross_region_inference_region(self) -> List[str]: """ Abbreviations of regions AWS Bedrock supports for cross region inference """ - return ["us", "eu"] + return ["us", "eu", "apac"] def _get_base_model(self, model: str) -> str: """ diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index f5c4f694dc50..f4d20f8fb97d 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -28,6 +28,62 @@ _DEFAULT_TIMEOUT = httpx.Timeout(timeout=5.0, connect=5.0) _DEFAULT_TTL_FOR_HTTPX_CLIENTS = 3600 # 1 hour, re-use the same httpx client for 1 hour +import re + + +def mask_sensitive_info(error_message): + # Find the start of the key parameter + if isinstance(error_message, str): + key_index = error_message.find("key=") + else: + return error_message + + # If key is found + if key_index != -1: + # Find the end of the key parameter (next & or end of string) + next_param = error_message.find("&", key_index) + + if next_param == -1: + # If no more parameters, mask until the end of the string + masked_message = error_message[: key_index + 4] + "[REDACTED_API_KEY]" + else: + # Replace the key with redacted value, keeping other parameters + masked_message = ( + error_message[: key_index + 4] + + "[REDACTED_API_KEY]" + + error_message[next_param:] + ) + + return masked_message + + return error_message + + +class MaskedHTTPStatusError(httpx.HTTPStatusError): + def __init__( + self, original_error, message: Optional[str] = None, text: Optional[str] = None + ): + # Create a new error with the masked URL + masked_url = mask_sensitive_info(str(original_error.request.url)) + # Create a new error that looks like the original, but with a masked URL + + super().__init__( + message=original_error.message, + request=httpx.Request( + method=original_error.request.method, + url=masked_url, + headers=original_error.request.headers, + content=original_error.request.content, + ), + response=httpx.Response( + status_code=original_error.response.status_code, + content=original_error.response.content, + headers=original_error.response.headers, + ), + ) + self.message = message + self.text = text + class AsyncHTTPHandler: def __init__( @@ -155,13 +211,16 @@ async def post( headers=headers, ) except httpx.HTTPStatusError as e: - setattr(e, "status_code", e.response.status_code) + if stream is True: setattr(e, "message", await e.response.aread()) setattr(e, "text", await e.response.aread()) else: - setattr(e, "message", e.response.text) - setattr(e, "text", e.response.text) + setattr(e, "message", mask_sensitive_info(e.response.text)) + setattr(e, "text", mask_sensitive_info(e.response.text)) + + setattr(e, "status_code", e.response.status_code) + raise e except Exception as e: raise e @@ -399,11 +458,17 @@ def post( llm_provider="litellm-httpx-handler", ) except httpx.HTTPStatusError as e: - setattr(e, "status_code", e.response.status_code) + if stream is True: - setattr(e, "message", e.response.read()) + setattr(e, "message", mask_sensitive_info(e.response.read())) + setattr(e, "text", mask_sensitive_info(e.response.read())) else: - setattr(e, "message", e.response.text) + error_text = mask_sensitive_info(e.response.text) + setattr(e, "message", error_text) + setattr(e, "text", error_text) + + setattr(e, "status_code", e.response.status_code) + raise e except Exception as e: raise e diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index cb79a81b7de7..bfd35ca47546 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1159,15 +1159,44 @@ def convert_to_anthropic_tool_result( ] } """ - content_str: str = "" + anthropic_content: Union[ + str, + List[Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam]], + ] = "" if isinstance(message["content"], str): - content_str = message["content"] + anthropic_content = message["content"] elif isinstance(message["content"], List): content_list = message["content"] + anthropic_content_list: List[ + Union[AnthropicMessagesToolResultContent, AnthropicMessagesImageParam] + ] = [] for content in content_list: if content["type"] == "text": - content_str += content["text"] + anthropic_content_list.append( + AnthropicMessagesToolResultContent( + type="text", + text=content["text"], + ) + ) + elif content["type"] == "image_url": + if isinstance(content["image_url"], str): + image_chunk = convert_to_anthropic_image_obj(content["image_url"]) + else: + image_chunk = convert_to_anthropic_image_obj( + content["image_url"]["url"] + ) + anthropic_content_list.append( + AnthropicMessagesImageParam( + type="image", + source=AnthropicContentParamSource( + type="base64", + media_type=image_chunk["media_type"], + data=image_chunk["data"], + ), + ) + ) + anthropic_content = anthropic_content_list anthropic_tool_result: Optional[AnthropicMessagesToolResultParam] = None ## PROMPT CACHING CHECK ## cache_control = message.get("cache_control", None) @@ -1178,14 +1207,14 @@ def convert_to_anthropic_tool_result( # We can't determine from openai message format whether it's a successful or # error call result so default to the successful result template anthropic_tool_result = AnthropicMessagesToolResultParam( - type="tool_result", tool_use_id=tool_call_id, content=content_str + type="tool_result", tool_use_id=tool_call_id, content=anthropic_content ) if message["role"] == "function": function_message: ChatCompletionFunctionMessage = message tool_call_id = function_message.get("tool_call_id") or str(uuid.uuid4()) anthropic_tool_result = AnthropicMessagesToolResultParam( - type="tool_result", tool_use_id=tool_call_id, content=content_str + type="tool_result", tool_use_id=tool_call_id, content=anthropic_content ) if anthropic_tool_result is None: diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py index 4b5b7281bcc4..c9fe6e3f4d69 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py @@ -107,6 +107,10 @@ def _get_image_mime_type_from_url(url: str) -> Optional[str]: return "image/png" elif url.endswith(".webp"): return "image/webp" + elif url.endswith(".mp4"): + return "video/mp4" + elif url.endswith(".pdf"): + return "application/pdf" return None diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b0d0e7d37748..ac22871bcc40 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3383,6 +3383,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-001": { @@ -3406,6 +3408,8 @@ "supports_vision": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash": { @@ -3428,6 +3432,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-latest": { @@ -3450,6 +3456,32 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, + "source": "https://ai.google.dev/pricing" + }, + "gemini/gemini-1.5-flash-8b": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-8b-exp-0924": { @@ -3472,6 +3504,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-exp-1114": { @@ -3494,7 +3528,12 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, - "source": "https://ai.google.dev/pricing" + "tpm": 4000000, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro." + } }, "gemini/gemini-1.5-flash-exp-0827": { "max_tokens": 8192, @@ -3516,6 +3555,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 2000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-flash-8b-exp-0827": { @@ -3537,6 +3578,9 @@ "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 4000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-pro": { @@ -3550,7 +3594,10 @@ "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "rpd": 30000, + "tpm": 120000, + "rpm": 360, + "source": "https://ai.google.dev/gemini-api/docs/models/gemini" }, "gemini/gemini-1.5-pro": { "max_tokens": 8192, @@ -3567,6 +3614,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-002": { @@ -3585,6 +3634,8 @@ "supports_tool_choice": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-001": { @@ -3603,6 +3654,8 @@ "supports_tool_choice": true, "supports_response_schema": true, "supports_prompt_caching": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-exp-0801": { @@ -3620,6 +3673,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-exp-0827": { @@ -3637,6 +3692,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-1.5-pro-latest": { @@ -3654,6 +3711,8 @@ "supports_vision": true, "supports_tool_choice": true, "supports_response_schema": true, + "tpm": 4000000, + "rpm": 1000, "source": "https://ai.google.dev/pricing" }, "gemini/gemini-pro-vision": { @@ -3668,6 +3727,9 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, + "rpd": 30000, + "tpm": 120000, + "rpm": 360, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini/gemini-gemma-2-27b-it": { diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index 223f5d80e078..000000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js b/litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js deleted file mode 100644 index 51181e75a02a..000000000000 --- a/litellm/proxy/_experimental/out/_next/static/chunks/131-3d2257b0ff5aadb2.js +++ /dev/null @@ -1,8 +0,0 @@ -"use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[131],{84174:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},50459:function(e,t,n){n.d(t,{Z:function(){return s}});var a=n(14749),r=n(2265),i={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},o=n(60688),s=r.forwardRef(function(e,t){return r.createElement(o.Z,(0,a.Z)({},e,{ref:t,icon:i}))})},92836:function(e,t,n){n.d(t,{Z:function(){return p}});var a=n(69703),r=n(80991),i=n(2898),o=n(99250),s=n(65492),l=n(2265),c=n(41608),d=n(50027);n(18174),n(21871),n(41213);let u=(0,s.fn)("Tab"),p=l.forwardRef((e,t)=>{let{icon:n,className:p,children:g}=e,m=(0,a._T)(e,["icon","className","children"]),b=(0,l.useContext)(c.O),f=(0,l.useContext)(d.Z);return l.createElement(r.O,Object.assign({ref:t,className:(0,o.q)(u("root"),"flex whitespace-nowrap truncate max-w-xs outline-none focus:ring-0 text-tremor-default transition duration-100",f?(0,s.bM)(f,i.K.text).selectTextColor:"solid"===b?"ui-selected:text-tremor-content-emphasis dark:ui-selected:text-dark-tremor-content-emphasis":"ui-selected:text-tremor-brand dark:ui-selected:text-dark-tremor-brand",function(e,t){switch(e){case"line":return(0,o.q)("ui-selected:border-b-2 hover:border-b-2 border-transparent transition duration-100 -mb-px px-2 py-2","hover:border-tremor-content hover:text-tremor-content-emphasis text-tremor-content","dark:hover:border-dark-tremor-content-emphasis dark:hover:text-dark-tremor-content-emphasis dark:text-dark-tremor-content",t?(0,s.bM)(t,i.K.border).selectBorderColor:"ui-selected:border-tremor-brand dark:ui-selected:border-dark-tremor-brand");case"solid":return(0,o.q)("border-transparent border rounded-tremor-small px-2.5 py-1","ui-selected:border-tremor-border ui-selected:bg-tremor-background ui-selected:shadow-tremor-input hover:text-tremor-content-emphasis ui-selected:text-tremor-brand","dark:ui-selected:border-dark-tremor-border dark:ui-selected:bg-dark-tremor-background dark:ui-selected:shadow-dark-tremor-input dark:hover:text-dark-tremor-content-emphasis dark:ui-selected:text-dark-tremor-brand",t?(0,s.bM)(t,i.K.text).selectTextColor:"text-tremor-content dark:text-dark-tremor-content")}}(b,f),p)},m),n?l.createElement(n,{className:(0,o.q)(u("icon"),"flex-none h-5 w-5",g?"mr-2":"")}):null,g?l.createElement("span",null,g):null)});p.displayName="Tab"},26734:function(e,t,n){n.d(t,{Z:function(){return c}});var a=n(69703),r=n(80991),i=n(99250),o=n(65492),s=n(2265);let l=(0,o.fn)("TabGroup"),c=s.forwardRef((e,t)=>{let{defaultIndex:n,index:o,onIndexChange:c,children:d,className:u}=e,p=(0,a._T)(e,["defaultIndex","index","onIndexChange","children","className"]);return s.createElement(r.O.Group,Object.assign({as:"div",ref:t,defaultIndex:n,selectedIndex:o,onChange:c,className:(0,i.q)(l("root"),"w-full",u)},p),d)});c.displayName="TabGroup"},41608:function(e,t,n){n.d(t,{O:function(){return c},Z:function(){return u}});var a=n(69703),r=n(2265),i=n(50027);n(18174),n(21871),n(41213);var o=n(80991),s=n(99250);let l=(0,n(65492).fn)("TabList"),c=(0,r.createContext)("line"),d={line:(0,s.q)("flex border-b space-x-4","border-tremor-border","dark:border-dark-tremor-border"),solid:(0,s.q)("inline-flex p-0.5 rounded-tremor-default space-x-1.5","bg-tremor-background-subtle","dark:bg-dark-tremor-background-subtle")},u=r.forwardRef((e,t)=>{let{color:n,variant:u="line",children:p,className:g}=e,m=(0,a._T)(e,["color","variant","children","className"]);return r.createElement(o.O.List,Object.assign({ref:t,className:(0,s.q)(l("root"),"justify-start overflow-x-clip",d[u],g)},m),r.createElement(c.Provider,{value:u},r.createElement(i.Z.Provider,{value:n},p)))});u.displayName="TabList"},32126:function(e,t,n){n.d(t,{Z:function(){return d}});var a=n(69703);n(50027);var r=n(18174);n(21871);var i=n(41213),o=n(99250),s=n(65492),l=n(2265);let c=(0,s.fn)("TabPanel"),d=l.forwardRef((e,t)=>{let{children:n,className:s}=e,d=(0,a._T)(e,["children","className"]),{selectedValue:u}=(0,l.useContext)(i.Z),p=u===(0,l.useContext)(r.Z);return l.createElement("div",Object.assign({ref:t,className:(0,o.q)(c("root"),"w-full mt-2",p?"":"hidden",s),"aria-selected":p?"true":"false"},d),n)});d.displayName="TabPanel"},23682:function(e,t,n){n.d(t,{Z:function(){return u}});var a=n(69703),r=n(80991);n(50027);var i=n(18174);n(21871);var o=n(41213),s=n(99250),l=n(65492),c=n(2265);let d=(0,l.fn)("TabPanels"),u=c.forwardRef((e,t)=>{let{children:n,className:l}=e,u=(0,a._T)(e,["children","className"]);return c.createElement(r.O.Panels,Object.assign({as:"div",ref:t,className:(0,s.q)(d("root"),"w-full",l)},u),e=>{let{selectedIndex:t}=e;return c.createElement(o.Z.Provider,{value:{selectedValue:t}},c.Children.map(n,(e,t)=>c.createElement(i.Z.Provider,{value:t},e)))})});u.displayName="TabPanels"},50027:function(e,t,n){n.d(t,{Z:function(){return i}});var a=n(2265),r=n(54942);n(99250);let i=(0,a.createContext)(r.fr.Blue)},18174:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(0)},21871:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)(void 0)},41213:function(e,t,n){n.d(t,{Z:function(){return a}});let a=(0,n(2265).createContext)({selectedValue:void 0,handleValueChange:void 0})},21467:function(e,t,n){n.d(t,{i:function(){return s}});var a=n(2265),r=n(44329),i=n(54165),o=n(57499);function s(e){return t=>a.createElement(i.ZP,{theme:{token:{motion:!1,zIndexPopupBase:0}}},a.createElement(e,Object.assign({},t)))}t.Z=(e,t,n,i)=>s(s=>{let{prefixCls:l,style:c}=s,d=a.useRef(null),[u,p]=a.useState(0),[g,m]=a.useState(0),[b,f]=(0,r.Z)(!1,{value:s.open}),{getPrefixCls:E}=a.useContext(o.E_),h=E(t||"select",l);a.useEffect(()=>{if(f(!0),"undefined"!=typeof ResizeObserver){let e=new ResizeObserver(e=>{let t=e[0].target;p(t.offsetHeight+8),m(t.offsetWidth)}),t=setInterval(()=>{var a;let r=n?".".concat(n(h)):".".concat(h,"-dropdown"),i=null===(a=d.current)||void 0===a?void 0:a.querySelector(r);i&&(clearInterval(t),e.observe(i))},10);return()=>{clearInterval(t),e.disconnect()}}},[]);let S=Object.assign(Object.assign({},s),{style:Object.assign(Object.assign({},c),{margin:0}),open:b,visible:b,getPopupContainer:()=>d.current});return i&&(S=i(S)),a.createElement("div",{ref:d,style:{paddingBottom:u,position:"relative",minWidth:g}},a.createElement(e,Object.assign({},S)))})},99129:function(e,t,n){let a;n.d(t,{Z:function(){return eY}});var r=n(63787),i=n(2265),o=n(37274),s=n(57499),l=n(54165),c=n(99537),d=n(77136),u=n(20653),p=n(40388),g=n(16480),m=n.n(g),b=n(51761),f=n(47387),E=n(70595),h=n(24750),S=n(89211),y=n(13565),T=n(51350),A=e=>{let{type:t,children:n,prefixCls:a,buttonProps:r,close:o,autoFocus:s,emitEvent:l,isSilent:c,quitOnNullishReturnValue:d,actionFn:u}=e,p=i.useRef(!1),g=i.useRef(null),[m,b]=(0,S.Z)(!1),f=function(){null==o||o.apply(void 0,arguments)};i.useEffect(()=>{let e=null;return s&&(e=setTimeout(()=>{var e;null===(e=g.current)||void 0===e||e.focus()})),()=>{e&&clearTimeout(e)}},[]);let E=e=>{e&&e.then&&(b(!0),e.then(function(){b(!1,!0),f.apply(void 0,arguments),p.current=!1},e=>{if(b(!1,!0),p.current=!1,null==c||!c())return Promise.reject(e)}))};return i.createElement(y.ZP,Object.assign({},(0,T.nx)(t),{onClick:e=>{let t;if(!p.current){if(p.current=!0,!u){f();return}if(l){var n;if(t=u(e),d&&!((n=t)&&n.then)){p.current=!1,f(e);return}}else if(u.length)t=u(o),p.current=!1;else if(!(t=u())){f();return}E(t)}},loading:m,prefixCls:a},r,{ref:g}),n)};let R=i.createContext({}),{Provider:I}=R;var N=()=>{let{autoFocusButton:e,cancelButtonProps:t,cancelTextLocale:n,isSilent:a,mergedOkCancel:r,rootPrefixCls:o,close:s,onCancel:l,onConfirm:c}=(0,i.useContext)(R);return r?i.createElement(A,{isSilent:a,actionFn:l,close:function(){null==s||s.apply(void 0,arguments),null==c||c(!1)},autoFocus:"cancel"===e,buttonProps:t,prefixCls:"".concat(o,"-btn")},n):null},_=()=>{let{autoFocusButton:e,close:t,isSilent:n,okButtonProps:a,rootPrefixCls:r,okTextLocale:o,okType:s,onConfirm:l,onOk:c}=(0,i.useContext)(R);return i.createElement(A,{isSilent:n,type:s||"primary",actionFn:c,close:function(){null==t||t.apply(void 0,arguments),null==l||l(!0)},autoFocus:"ok"===e,buttonProps:a,prefixCls:"".concat(r,"-btn")},o)},v=n(81303),w=n(14749),k=n(80406),C=n(88804),O=i.createContext({}),x=n(5239),L=n(31506),D=n(91010),P=n(4295),M=n(72480);function F(e,t,n){var a=t;return!a&&n&&(a="".concat(e,"-").concat(n)),a}function U(e,t){var n=e["page".concat(t?"Y":"X","Offset")],a="scroll".concat(t?"Top":"Left");if("number"!=typeof n){var r=e.document;"number"!=typeof(n=r.documentElement[a])&&(n=r.body[a])}return n}var B=n(49367),G=n(74084),$=i.memo(function(e){return e.children},function(e,t){return!t.shouldUpdate}),H={width:0,height:0,overflow:"hidden",outline:"none"},z=i.forwardRef(function(e,t){var n,a,r,o=e.prefixCls,s=e.className,l=e.style,c=e.title,d=e.ariaId,u=e.footer,p=e.closable,g=e.closeIcon,b=e.onClose,f=e.children,E=e.bodyStyle,h=e.bodyProps,S=e.modalRender,y=e.onMouseDown,T=e.onMouseUp,A=e.holderRef,R=e.visible,I=e.forceRender,N=e.width,_=e.height,v=e.classNames,k=e.styles,C=i.useContext(O).panel,L=(0,G.x1)(A,C),D=(0,i.useRef)(),P=(0,i.useRef)();i.useImperativeHandle(t,function(){return{focus:function(){var e;null===(e=D.current)||void 0===e||e.focus()},changeActive:function(e){var t=document.activeElement;e&&t===P.current?D.current.focus():e||t!==D.current||P.current.focus()}}});var M={};void 0!==N&&(M.width=N),void 0!==_&&(M.height=_),u&&(n=i.createElement("div",{className:m()("".concat(o,"-footer"),null==v?void 0:v.footer),style:(0,x.Z)({},null==k?void 0:k.footer)},u)),c&&(a=i.createElement("div",{className:m()("".concat(o,"-header"),null==v?void 0:v.header),style:(0,x.Z)({},null==k?void 0:k.header)},i.createElement("div",{className:"".concat(o,"-title"),id:d},c))),p&&(r=i.createElement("button",{type:"button",onClick:b,"aria-label":"Close",className:"".concat(o,"-close")},g||i.createElement("span",{className:"".concat(o,"-close-x")})));var F=i.createElement("div",{className:m()("".concat(o,"-content"),null==v?void 0:v.content),style:null==k?void 0:k.content},r,a,i.createElement("div",(0,w.Z)({className:m()("".concat(o,"-body"),null==v?void 0:v.body),style:(0,x.Z)((0,x.Z)({},E),null==k?void 0:k.body)},h),f),n);return i.createElement("div",{key:"dialog-element",role:"dialog","aria-labelledby":c?d:null,"aria-modal":"true",ref:L,style:(0,x.Z)((0,x.Z)({},l),M),className:m()(o,s),onMouseDown:y,onMouseUp:T},i.createElement("div",{tabIndex:0,ref:D,style:H,"aria-hidden":"true"}),i.createElement($,{shouldUpdate:R||I},S?S(F):F),i.createElement("div",{tabIndex:0,ref:P,style:H,"aria-hidden":"true"}))}),j=i.forwardRef(function(e,t){var n=e.prefixCls,a=e.title,r=e.style,o=e.className,s=e.visible,l=e.forceRender,c=e.destroyOnClose,d=e.motionName,u=e.ariaId,p=e.onVisibleChanged,g=e.mousePosition,b=(0,i.useRef)(),f=i.useState(),E=(0,k.Z)(f,2),h=E[0],S=E[1],y={};function T(){var e,t,n,a,r,i=(n={left:(t=(e=b.current).getBoundingClientRect()).left,top:t.top},r=(a=e.ownerDocument).defaultView||a.parentWindow,n.left+=U(r),n.top+=U(r,!0),n);S(g?"".concat(g.x-i.left,"px ").concat(g.y-i.top,"px"):"")}return h&&(y.transformOrigin=h),i.createElement(B.ZP,{visible:s,onVisibleChanged:p,onAppearPrepare:T,onEnterPrepare:T,forceRender:l,motionName:d,removeOnLeave:c,ref:b},function(s,l){var c=s.className,d=s.style;return i.createElement(z,(0,w.Z)({},e,{ref:t,title:a,ariaId:u,prefixCls:n,holderRef:l,style:(0,x.Z)((0,x.Z)((0,x.Z)({},d),r),y),className:m()(o,c)}))})});function V(e){var t=e.prefixCls,n=e.style,a=e.visible,r=e.maskProps,o=e.motionName,s=e.className;return i.createElement(B.ZP,{key:"mask",visible:a,motionName:o,leavedClassName:"".concat(t,"-mask-hidden")},function(e,a){var o=e.className,l=e.style;return i.createElement("div",(0,w.Z)({ref:a,style:(0,x.Z)((0,x.Z)({},l),n),className:m()("".concat(t,"-mask"),o,s)},r))})}function W(e){var t=e.prefixCls,n=void 0===t?"rc-dialog":t,a=e.zIndex,r=e.visible,o=void 0!==r&&r,s=e.keyboard,l=void 0===s||s,c=e.focusTriggerAfterClose,d=void 0===c||c,u=e.wrapStyle,p=e.wrapClassName,g=e.wrapProps,b=e.onClose,f=e.afterOpenChange,E=e.afterClose,h=e.transitionName,S=e.animation,y=e.closable,T=e.mask,A=void 0===T||T,R=e.maskTransitionName,I=e.maskAnimation,N=e.maskClosable,_=e.maskStyle,v=e.maskProps,C=e.rootClassName,O=e.classNames,U=e.styles,B=(0,i.useRef)(),G=(0,i.useRef)(),$=(0,i.useRef)(),H=i.useState(o),z=(0,k.Z)(H,2),W=z[0],q=z[1],Y=(0,D.Z)();function K(e){null==b||b(e)}var Z=(0,i.useRef)(!1),X=(0,i.useRef)(),Q=null;return(void 0===N||N)&&(Q=function(e){Z.current?Z.current=!1:G.current===e.target&&K(e)}),(0,i.useEffect)(function(){o&&(q(!0),(0,L.Z)(G.current,document.activeElement)||(B.current=document.activeElement))},[o]),(0,i.useEffect)(function(){return function(){clearTimeout(X.current)}},[]),i.createElement("div",(0,w.Z)({className:m()("".concat(n,"-root"),C)},(0,M.Z)(e,{data:!0})),i.createElement(V,{prefixCls:n,visible:A&&o,motionName:F(n,R,I),style:(0,x.Z)((0,x.Z)({zIndex:a},_),null==U?void 0:U.mask),maskProps:v,className:null==O?void 0:O.mask}),i.createElement("div",(0,w.Z)({tabIndex:-1,onKeyDown:function(e){if(l&&e.keyCode===P.Z.ESC){e.stopPropagation(),K(e);return}o&&e.keyCode===P.Z.TAB&&$.current.changeActive(!e.shiftKey)},className:m()("".concat(n,"-wrap"),p,null==O?void 0:O.wrapper),ref:G,onClick:Q,style:(0,x.Z)((0,x.Z)((0,x.Z)({zIndex:a},u),null==U?void 0:U.wrapper),{},{display:W?null:"none"})},g),i.createElement(j,(0,w.Z)({},e,{onMouseDown:function(){clearTimeout(X.current),Z.current=!0},onMouseUp:function(){X.current=setTimeout(function(){Z.current=!1})},ref:$,closable:void 0===y||y,ariaId:Y,prefixCls:n,visible:o&&W,onClose:K,onVisibleChanged:function(e){if(e)!function(){if(!(0,L.Z)(G.current,document.activeElement)){var e;null===(e=$.current)||void 0===e||e.focus()}}();else{if(q(!1),A&&B.current&&d){try{B.current.focus({preventScroll:!0})}catch(e){}B.current=null}W&&(null==E||E())}null==f||f(e)},motionName:F(n,h,S)}))))}j.displayName="Content",n(53850);var q=function(e){var t=e.visible,n=e.getContainer,a=e.forceRender,r=e.destroyOnClose,o=void 0!==r&&r,s=e.afterClose,l=e.panelRef,c=i.useState(t),d=(0,k.Z)(c,2),u=d[0],p=d[1],g=i.useMemo(function(){return{panel:l}},[l]);return(i.useEffect(function(){t&&p(!0)},[t]),a||!o||u)?i.createElement(O.Provider,{value:g},i.createElement(C.Z,{open:t||a||u,autoDestroy:!1,getContainer:n,autoLock:t||u},i.createElement(W,(0,w.Z)({},e,{destroyOnClose:o,afterClose:function(){null==s||s(),p(!1)}})))):null};q.displayName="Dialog";var Y=function(e,t,n){let a=arguments.length>3&&void 0!==arguments[3]?arguments[3]:i.createElement(v.Z,null),r=arguments.length>4&&void 0!==arguments[4]&&arguments[4];if("boolean"==typeof e?!e:void 0===t?!r:!1===t||null===t)return[!1,null];let o="boolean"==typeof t||null==t?a:t;return[!0,n?n(o):o]},K=n(22127),Z=n(86718),X=n(47137),Q=n(92801),J=n(48563);function ee(){}let et=i.createContext({add:ee,remove:ee});var en=n(17094),ea=()=>{let{cancelButtonProps:e,cancelTextLocale:t,onCancel:n}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({onClick:n},e),t)},er=()=>{let{confirmLoading:e,okButtonProps:t,okType:n,okTextLocale:a,onOk:r}=(0,i.useContext)(R);return i.createElement(y.ZP,Object.assign({},(0,T.nx)(n),{loading:e,onClick:r},t),a)},ei=n(4678);function eo(e,t){return i.createElement("span",{className:"".concat(e,"-close-x")},t||i.createElement(v.Z,{className:"".concat(e,"-close-icon")}))}let es=e=>{let t;let{okText:n,okType:a="primary",cancelText:o,confirmLoading:s,onOk:l,onCancel:c,okButtonProps:d,cancelButtonProps:u,footer:p}=e,[g]=(0,E.Z)("Modal",(0,ei.A)()),m={confirmLoading:s,okButtonProps:d,cancelButtonProps:u,okTextLocale:n||(null==g?void 0:g.okText),cancelTextLocale:o||(null==g?void 0:g.cancelText),okType:a,onOk:l,onCancel:c},b=i.useMemo(()=>m,(0,r.Z)(Object.values(m)));return"function"==typeof p||void 0===p?(t=i.createElement(i.Fragment,null,i.createElement(ea,null),i.createElement(er,null)),"function"==typeof p&&(t=p(t,{OkBtn:er,CancelBtn:ea})),t=i.createElement(I,{value:b},t)):t=p,i.createElement(en.n,{disabled:!1},t)};var el=n(11303),ec=n(13703),ed=n(58854),eu=n(80316),ep=n(76585),eg=n(8985);function em(e){return{position:e,inset:0}}let eb=e=>{let{componentCls:t,antCls:n}=e;return[{["".concat(t,"-root")]:{["".concat(t).concat(n,"-zoom-enter, ").concat(t).concat(n,"-zoom-appear")]:{transform:"none",opacity:0,animationDuration:e.motionDurationSlow,userSelect:"none"},["".concat(t).concat(n,"-zoom-leave ").concat(t,"-content")]:{pointerEvents:"none"},["".concat(t,"-mask")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,height:"100%",backgroundColor:e.colorBgMask,pointerEvents:"none",["".concat(t,"-hidden")]:{display:"none"}}),["".concat(t,"-wrap")]:Object.assign(Object.assign({},em("fixed")),{zIndex:e.zIndexPopupBase,overflow:"auto",outline:0,WebkitOverflowScrolling:"touch",["&:has(".concat(t).concat(n,"-zoom-enter), &:has(").concat(t).concat(n,"-zoom-appear)")]:{pointerEvents:"none"}})}},{["".concat(t,"-root")]:(0,ec.J$)(e)}]},ef=e=>{let{componentCls:t}=e;return[{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl"},["".concat(t,"-centered")]:{textAlign:"center","&::before":{display:"inline-block",width:0,height:"100%",verticalAlign:"middle",content:'""'},[t]:{top:0,display:"inline-block",paddingBottom:0,textAlign:"start",verticalAlign:"middle"}},["@media (max-width: ".concat(e.screenSMMax,"px)")]:{[t]:{maxWidth:"calc(100vw - 16px)",margin:"".concat((0,eg.bf)(e.marginXS)," auto")},["".concat(t,"-centered")]:{[t]:{flex:1}}}}},{[t]:Object.assign(Object.assign({},(0,el.Wf)(e)),{pointerEvents:"none",position:"relative",top:100,width:"auto",maxWidth:"calc(100vw - ".concat((0,eg.bf)(e.calc(e.margin).mul(2).equal()),")"),margin:"0 auto",paddingBottom:e.paddingLG,["".concat(t,"-title")]:{margin:0,color:e.titleColor,fontWeight:e.fontWeightStrong,fontSize:e.titleFontSize,lineHeight:e.titleLineHeight,wordWrap:"break-word"},["".concat(t,"-content")]:{position:"relative",backgroundColor:e.contentBg,backgroundClip:"padding-box",border:0,borderRadius:e.borderRadiusLG,boxShadow:e.boxShadow,pointerEvents:"auto",padding:e.contentPadding},["".concat(t,"-close")]:Object.assign({position:"absolute",top:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),insetInlineEnd:e.calc(e.modalHeaderHeight).sub(e.modalCloseBtnSize).div(2).equal(),zIndex:e.calc(e.zIndexPopupBase).add(10).equal(),padding:0,color:e.modalCloseIconColor,fontWeight:e.fontWeightStrong,lineHeight:1,textDecoration:"none",background:"transparent",borderRadius:e.borderRadiusSM,width:e.modalCloseBtnSize,height:e.modalCloseBtnSize,border:0,outline:0,cursor:"pointer",transition:"color ".concat(e.motionDurationMid,", background-color ").concat(e.motionDurationMid),"&-x":{display:"flex",fontSize:e.fontSizeLG,fontStyle:"normal",lineHeight:"".concat((0,eg.bf)(e.modalCloseBtnSize)),justifyContent:"center",textTransform:"none",textRendering:"auto"},"&:hover":{color:e.modalIconHoverColor,backgroundColor:e.closeBtnHoverBg,textDecoration:"none"},"&:active":{backgroundColor:e.closeBtnActiveBg}},(0,el.Qy)(e)),["".concat(t,"-header")]:{color:e.colorText,background:e.headerBg,borderRadius:"".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)," 0 0"),marginBottom:e.headerMarginBottom,padding:e.headerPadding,borderBottom:e.headerBorderBottom},["".concat(t,"-body")]:{fontSize:e.fontSize,lineHeight:e.lineHeight,wordWrap:"break-word",padding:e.bodyPadding},["".concat(t,"-footer")]:{textAlign:"end",background:e.footerBg,marginTop:e.footerMarginTop,padding:e.footerPadding,borderTop:e.footerBorderTop,borderRadius:e.footerBorderRadius,["> ".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginInlineStart:e.marginXS}},["".concat(t,"-open")]:{overflow:"hidden"}})},{["".concat(t,"-pure-panel")]:{top:"auto",padding:0,display:"flex",flexDirection:"column",["".concat(t,"-content,\n ").concat(t,"-body,\n ").concat(t,"-confirm-body-wrapper")]:{display:"flex",flexDirection:"column",flex:"auto"},["".concat(t,"-confirm-body")]:{marginBottom:"auto"}}}]},eE=e=>{let{componentCls:t}=e;return{["".concat(t,"-root")]:{["".concat(t,"-wrap-rtl")]:{direction:"rtl",["".concat(t,"-confirm-body")]:{direction:"rtl"}}}}},eh=e=>{let t=e.padding,n=e.fontSizeHeading5,a=e.lineHeightHeading5;return(0,eu.TS)(e,{modalHeaderHeight:e.calc(e.calc(a).mul(n).equal()).add(e.calc(t).mul(2).equal()).equal(),modalFooterBorderColorSplit:e.colorSplit,modalFooterBorderStyle:e.lineType,modalFooterBorderWidth:e.lineWidth,modalIconHoverColor:e.colorIconHover,modalCloseIconColor:e.colorIcon,modalCloseBtnSize:e.fontHeight,modalConfirmIconSize:e.fontHeight,modalTitleHeight:e.calc(e.titleFontSize).mul(e.titleLineHeight).equal()})},eS=e=>({footerBg:"transparent",headerBg:e.colorBgElevated,titleLineHeight:e.lineHeightHeading5,titleFontSize:e.fontSizeHeading5,contentBg:e.colorBgElevated,titleColor:e.colorTextHeading,closeBtnHoverBg:e.wireframe?"transparent":e.colorFillContent,closeBtnActiveBg:e.wireframe?"transparent":e.colorFillContentHover,contentPadding:e.wireframe?0:"".concat((0,eg.bf)(e.paddingMD)," ").concat((0,eg.bf)(e.paddingContentHorizontalLG)),headerPadding:e.wireframe?"".concat((0,eg.bf)(e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,headerBorderBottom:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",headerMarginBottom:e.wireframe?0:e.marginXS,bodyPadding:e.wireframe?e.paddingLG:0,footerPadding:e.wireframe?"".concat((0,eg.bf)(e.paddingXS)," ").concat((0,eg.bf)(e.padding)):0,footerBorderTop:e.wireframe?"".concat((0,eg.bf)(e.lineWidth)," ").concat(e.lineType," ").concat(e.colorSplit):"none",footerBorderRadius:e.wireframe?"0 0 ".concat((0,eg.bf)(e.borderRadiusLG)," ").concat((0,eg.bf)(e.borderRadiusLG)):0,footerMarginTop:e.wireframe?0:e.marginSM,confirmBodyPadding:e.wireframe?"".concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(2*e.padding)," ").concat((0,eg.bf)(e.paddingLG)):0,confirmIconMarginInlineEnd:e.wireframe?e.margin:e.marginSM,confirmBtnsMarginTop:e.wireframe?e.marginLG:e.marginSM});var ey=(0,ep.I$)("Modal",e=>{let t=eh(e);return[ef(t),eE(t),eb(t),(0,ed._y)(t,"zoom")]},eS,{unitless:{titleLineHeight:!0}}),eT=n(92935),eA=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};(0,K.Z)()&&window.document.documentElement&&document.documentElement.addEventListener("click",e=>{a={x:e.pageX,y:e.pageY},setTimeout(()=>{a=null},100)},!0);var eR=e=>{var t;let{getPopupContainer:n,getPrefixCls:r,direction:o,modal:l}=i.useContext(s.E_),c=t=>{let{onCancel:n}=e;null==n||n(t)},{prefixCls:d,className:u,rootClassName:p,open:g,wrapClassName:E,centered:h,getContainer:S,closeIcon:y,closable:T,focusTriggerAfterClose:A=!0,style:R,visible:I,width:N=520,footer:_,classNames:w,styles:k}=e,C=eA(e,["prefixCls","className","rootClassName","open","wrapClassName","centered","getContainer","closeIcon","closable","focusTriggerAfterClose","style","visible","width","footer","classNames","styles"]),O=r("modal",d),x=r(),L=(0,eT.Z)(O),[D,P,M]=ey(O,L),F=m()(E,{["".concat(O,"-centered")]:!!h,["".concat(O,"-wrap-rtl")]:"rtl"===o}),U=null!==_&&i.createElement(es,Object.assign({},e,{onOk:t=>{let{onOk:n}=e;null==n||n(t)},onCancel:c})),[B,G]=Y(T,y,e=>eo(O,e),i.createElement(v.Z,{className:"".concat(O,"-close-icon")}),!0),$=function(e){let t=i.useContext(et),n=i.useRef();return(0,J.zX)(a=>{if(a){let r=e?a.querySelector(e):a;t.add(r),n.current=r}else t.remove(n.current)})}(".".concat(O,"-content")),[H,z]=(0,b.Cn)("Modal",C.zIndex);return D(i.createElement(Q.BR,null,i.createElement(X.Ux,{status:!0,override:!0},i.createElement(Z.Z.Provider,{value:z},i.createElement(q,Object.assign({width:N},C,{zIndex:H,getContainer:void 0===S?n:S,prefixCls:O,rootClassName:m()(P,p,M,L),footer:U,visible:null!=g?g:I,mousePosition:null!==(t=C.mousePosition)&&void 0!==t?t:a,onClose:c,closable:B,closeIcon:G,focusTriggerAfterClose:A,transitionName:(0,f.m)(x,"zoom",e.transitionName),maskTransitionName:(0,f.m)(x,"fade",e.maskTransitionName),className:m()(P,u,null==l?void 0:l.className),style:Object.assign(Object.assign({},null==l?void 0:l.style),R),classNames:Object.assign(Object.assign({wrapper:F},null==l?void 0:l.classNames),w),styles:Object.assign(Object.assign({},null==l?void 0:l.styles),k),panelRef:$}))))))};let eI=e=>{let{componentCls:t,titleFontSize:n,titleLineHeight:a,modalConfirmIconSize:r,fontSize:i,lineHeight:o,modalTitleHeight:s,fontHeight:l,confirmBodyPadding:c}=e,d="".concat(t,"-confirm");return{[d]:{"&-rtl":{direction:"rtl"},["".concat(e.antCls,"-modal-header")]:{display:"none"},["".concat(d,"-body-wrapper")]:Object.assign({},(0,el.dF)()),["&".concat(t," ").concat(t,"-body")]:{padding:c},["".concat(d,"-body")]:{display:"flex",flexWrap:"nowrap",alignItems:"start",["> ".concat(e.iconCls)]:{flex:"none",fontSize:r,marginInlineEnd:e.confirmIconMarginInlineEnd,marginTop:e.calc(e.calc(l).sub(r).equal()).div(2).equal()},["&-has-title > ".concat(e.iconCls)]:{marginTop:e.calc(e.calc(s).sub(r).equal()).div(2).equal()}},["".concat(d,"-paragraph")]:{display:"flex",flexDirection:"column",flex:"auto",rowGap:e.marginXS,maxWidth:"calc(100% - ".concat((0,eg.bf)(e.calc(e.modalConfirmIconSize).add(e.marginSM).equal()),")")},["".concat(d,"-title")]:{color:e.colorTextHeading,fontWeight:e.fontWeightStrong,fontSize:n,lineHeight:a},["".concat(d,"-content")]:{color:e.colorText,fontSize:i,lineHeight:o},["".concat(d,"-btns")]:{textAlign:"end",marginTop:e.confirmBtnsMarginTop,["".concat(e.antCls,"-btn + ").concat(e.antCls,"-btn")]:{marginBottom:0,marginInlineStart:e.marginXS}}},["".concat(d,"-error ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorError},["".concat(d,"-warning ").concat(d,"-body > ").concat(e.iconCls,",\n ").concat(d,"-confirm ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorWarning},["".concat(d,"-info ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorInfo},["".concat(d,"-success ").concat(d,"-body > ").concat(e.iconCls)]:{color:e.colorSuccess}}};var eN=(0,ep.bk)(["Modal","confirm"],e=>[eI(eh(e))],eS,{order:-1e3}),e_=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n};function ev(e){let{prefixCls:t,icon:n,okText:a,cancelText:o,confirmPrefixCls:s,type:l,okCancel:g,footer:b,locale:f}=e,h=e_(e,["prefixCls","icon","okText","cancelText","confirmPrefixCls","type","okCancel","footer","locale"]),S=n;if(!n&&null!==n)switch(l){case"info":S=i.createElement(p.Z,null);break;case"success":S=i.createElement(c.Z,null);break;case"error":S=i.createElement(d.Z,null);break;default:S=i.createElement(u.Z,null)}let y=null!=g?g:"confirm"===l,T=null!==e.autoFocusButton&&(e.autoFocusButton||"ok"),[A]=(0,E.Z)("Modal"),R=f||A,v=a||(y?null==R?void 0:R.okText:null==R?void 0:R.justOkText),w=Object.assign({autoFocusButton:T,cancelTextLocale:o||(null==R?void 0:R.cancelText),okTextLocale:v,mergedOkCancel:y},h),k=i.useMemo(()=>w,(0,r.Z)(Object.values(w))),C=i.createElement(i.Fragment,null,i.createElement(N,null),i.createElement(_,null)),O=void 0!==e.title&&null!==e.title,x="".concat(s,"-body");return i.createElement("div",{className:"".concat(s,"-body-wrapper")},i.createElement("div",{className:m()(x,{["".concat(x,"-has-title")]:O})},S,i.createElement("div",{className:"".concat(s,"-paragraph")},O&&i.createElement("span",{className:"".concat(s,"-title")},e.title),i.createElement("div",{className:"".concat(s,"-content")},e.content))),void 0===b||"function"==typeof b?i.createElement(I,{value:k},i.createElement("div",{className:"".concat(s,"-btns")},"function"==typeof b?b(C,{OkBtn:_,CancelBtn:N}):C)):b,i.createElement(eN,{prefixCls:t}))}let ew=e=>{let{close:t,zIndex:n,afterClose:a,open:r,keyboard:o,centered:s,getContainer:l,maskStyle:c,direction:d,prefixCls:u,wrapClassName:p,rootPrefixCls:g,bodyStyle:E,closable:S=!1,closeIcon:y,modalRender:T,focusTriggerAfterClose:A,onConfirm:R,styles:I}=e,N="".concat(u,"-confirm"),_=e.width||416,v=e.style||{},w=void 0===e.mask||e.mask,k=void 0!==e.maskClosable&&e.maskClosable,C=m()(N,"".concat(N,"-").concat(e.type),{["".concat(N,"-rtl")]:"rtl"===d},e.className),[,O]=(0,h.ZP)(),x=i.useMemo(()=>void 0!==n?n:O.zIndexPopupBase+b.u6,[n,O]);return i.createElement(eR,{prefixCls:u,className:C,wrapClassName:m()({["".concat(N,"-centered")]:!!e.centered},p),onCancel:()=>{null==t||t({triggerCancel:!0}),null==R||R(!1)},open:r,title:"",footer:null,transitionName:(0,f.m)(g||"","zoom",e.transitionName),maskTransitionName:(0,f.m)(g||"","fade",e.maskTransitionName),mask:w,maskClosable:k,style:v,styles:Object.assign({body:E,mask:c},I),width:_,zIndex:x,afterClose:a,keyboard:o,centered:s,getContainer:l,closable:S,closeIcon:y,modalRender:T,focusTriggerAfterClose:A},i.createElement(ev,Object.assign({},e,{confirmPrefixCls:N})))};var ek=e=>{let{rootPrefixCls:t,iconPrefixCls:n,direction:a,theme:r}=e;return i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:n,direction:a,theme:r},i.createElement(ew,Object.assign({},e)))},eC=[];let eO="",ex=e=>{var t,n;let{prefixCls:a,getContainer:r,direction:o}=e,l=(0,ei.A)(),c=(0,i.useContext)(s.E_),d=eO||c.getPrefixCls(),u=a||"".concat(d,"-modal"),p=r;return!1===p&&(p=void 0),i.createElement(ek,Object.assign({},e,{rootPrefixCls:d,prefixCls:u,iconPrefixCls:c.iconPrefixCls,theme:c.theme,direction:null!=o?o:c.direction,locale:null!==(n=null===(t=c.locale)||void 0===t?void 0:t.Modal)&&void 0!==n?n:l,getContainer:p}))};function eL(e){let t;let n=(0,l.w6)(),a=document.createDocumentFragment(),s=Object.assign(Object.assign({},e),{close:u,open:!0});function c(){for(var t=arguments.length,n=Array(t),i=0;ie&&e.triggerCancel);e.onCancel&&s&&e.onCancel.apply(e,[()=>{}].concat((0,r.Z)(n.slice(1))));for(let e=0;e{let t=n.getPrefixCls(void 0,eO),r=n.getIconPrefixCls(),s=n.getTheme(),c=i.createElement(ex,Object.assign({},e));(0,o.s)(i.createElement(l.ZP,{prefixCls:t,iconPrefixCls:r,theme:s},n.holderRender?n.holderRender(c):c),a)})}function u(){for(var t=arguments.length,n=Array(t),a=0;a{"function"==typeof e.afterClose&&e.afterClose(),c.apply(this,n)}})).visible&&delete s.visible,d(s)}return d(s),eC.push(u),{destroy:u,update:function(e){d(s="function"==typeof e?e(s):Object.assign(Object.assign({},s),e))}}}function eD(e){return Object.assign(Object.assign({},e),{type:"warning"})}function eP(e){return Object.assign(Object.assign({},e),{type:"info"})}function eM(e){return Object.assign(Object.assign({},e),{type:"success"})}function eF(e){return Object.assign(Object.assign({},e),{type:"error"})}function eU(e){return Object.assign(Object.assign({},e),{type:"confirm"})}var eB=n(21467),eG=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},e$=(0,eB.i)(e=>{let{prefixCls:t,className:n,closeIcon:a,closable:r,type:o,title:l,children:c,footer:d}=e,u=eG(e,["prefixCls","className","closeIcon","closable","type","title","children","footer"]),{getPrefixCls:p}=i.useContext(s.E_),g=p(),b=t||p("modal"),f=(0,eT.Z)(g),[E,h,S]=ey(b,f),y="".concat(b,"-confirm"),T={};return T=o?{closable:null!=r&&r,title:"",footer:"",children:i.createElement(ev,Object.assign({},e,{prefixCls:b,confirmPrefixCls:y,rootPrefixCls:g,content:c}))}:{closable:null==r||r,title:l,footer:null!==d&&i.createElement(es,Object.assign({},e)),children:c},E(i.createElement(z,Object.assign({prefixCls:b,className:m()(h,"".concat(b,"-pure-panel"),o&&y,o&&"".concat(y,"-").concat(o),n,S,f)},u,{closeIcon:eo(b,a),closable:r},T)))}),eH=n(79474),ez=function(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&0>t.indexOf(a)&&(n[a]=e[a]);if(null!=e&&"function"==typeof Object.getOwnPropertySymbols)for(var r=0,a=Object.getOwnPropertySymbols(e);rt.indexOf(a[r])&&Object.prototype.propertyIsEnumerable.call(e,a[r])&&(n[a[r]]=e[a[r]]);return n},ej=i.forwardRef((e,t)=>{var n,{afterClose:a,config:o}=e,l=ez(e,["afterClose","config"]);let[c,d]=i.useState(!0),[u,p]=i.useState(o),{direction:g,getPrefixCls:m}=i.useContext(s.E_),b=m("modal"),f=m(),h=function(){d(!1);for(var e=arguments.length,t=Array(e),n=0;ne&&e.triggerCancel);u.onCancel&&a&&u.onCancel.apply(u,[()=>{}].concat((0,r.Z)(t.slice(1))))};i.useImperativeHandle(t,()=>({destroy:h,update:e=>{p(t=>Object.assign(Object.assign({},t),e))}}));let S=null!==(n=u.okCancel)&&void 0!==n?n:"confirm"===u.type,[y]=(0,E.Z)("Modal",eH.Z.Modal);return i.createElement(ek,Object.assign({prefixCls:b,rootPrefixCls:f},u,{close:h,open:c,afterClose:()=>{var e;a(),null===(e=u.afterClose)||void 0===e||e.call(u)},okText:u.okText||(S?null==y?void 0:y.okText:null==y?void 0:y.justOkText),direction:u.direction||g,cancelText:u.cancelText||(null==y?void 0:y.cancelText)},l))});let eV=0,eW=i.memo(i.forwardRef((e,t)=>{let[n,a]=function(){let[e,t]=i.useState([]);return[e,i.useCallback(e=>(t(t=>[].concat((0,r.Z)(t),[e])),()=>{t(t=>t.filter(t=>t!==e))}),[])]}();return i.useImperativeHandle(t,()=>({patchElement:a}),[]),i.createElement(i.Fragment,null,n)}));function eq(e){return eL(eD(e))}eR.useModal=function(){let e=i.useRef(null),[t,n]=i.useState([]);i.useEffect(()=>{t.length&&((0,r.Z)(t).forEach(e=>{e()}),n([]))},[t]);let a=i.useCallback(t=>function(a){var o;let s,l;eV+=1;let c=i.createRef(),d=new Promise(e=>{s=e}),u=!1,p=i.createElement(ej,{key:"modal-".concat(eV),config:t(a),ref:c,afterClose:()=>{null==l||l()},isSilent:()=>u,onConfirm:e=>{s(e)}});return(l=null===(o=e.current)||void 0===o?void 0:o.patchElement(p))&&eC.push(l),{destroy:()=>{function e(){var e;null===(e=c.current)||void 0===e||e.destroy()}c.current?e():n(t=>[].concat((0,r.Z)(t),[e]))},update:e=>{function t(){var t;null===(t=c.current)||void 0===t||t.update(e)}c.current?t():n(e=>[].concat((0,r.Z)(e),[t]))},then:e=>(u=!0,d.then(e))}},[]);return[i.useMemo(()=>({info:a(eP),success:a(eM),error:a(eF),warning:a(eD),confirm:a(eU)}),[]),i.createElement(eW,{key:"modal-holder",ref:e})]},eR.info=function(e){return eL(eP(e))},eR.success=function(e){return eL(eM(e))},eR.error=function(e){return eL(eF(e))},eR.warning=eq,eR.warn=eq,eR.confirm=function(e){return eL(eU(e))},eR.destroyAll=function(){for(;eC.length;){let e=eC.pop();e&&e()}},eR.config=function(e){let{rootPrefixCls:t}=e;eO=t},eR._InternalPanelDoNotUseOrYouWillBeFired=e$;var eY=eR},13703:function(e,t,n){n.d(t,{J$:function(){return s}});var a=n(8985),r=n(59353);let i=new a.E4("antFadeIn",{"0%":{opacity:0},"100%":{opacity:1}}),o=new a.E4("antFadeOut",{"0%":{opacity:1},"100%":{opacity:0}}),s=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],{antCls:n}=e,a="".concat(n,"-fade"),s=t?"&":"";return[(0,r.R)(a,i,o,e.motionDurationMid,t),{["\n ".concat(s).concat(a,"-enter,\n ").concat(s).concat(a,"-appear\n ")]:{opacity:0,animationTimingFunction:"linear"},["".concat(s).concat(a,"-leave")]:{animationTimingFunction:"linear"}}]}},44056:function(e){e.exports=function(e,n){for(var a,r,i,o=e||"",s=n||"div",l={},c=0;c4&&m.slice(0,4)===o&&s.test(t)&&("-"===t.charAt(4)?b=o+(n=t.slice(5).replace(l,u)).charAt(0).toUpperCase()+n.slice(1):(g=(p=t).slice(4),t=l.test(g)?p:("-"!==(g=g.replace(c,d)).charAt(0)&&(g="-"+g),o+g)),f=r),new f(b,t))};var s=/^data[-\w.:]+$/i,l=/-[a-z]/g,c=/[A-Z]/g;function d(e){return"-"+e.toLowerCase()}function u(e){return e.charAt(1).toUpperCase()}},31872:function(e,t,n){var a=n(96130),r=n(64730),i=n(61861),o=n(46982),s=n(83671),l=n(53618);e.exports=a([i,r,o,s,l])},83671:function(e,t,n){var a=n(7667),r=n(13585),i=a.booleanish,o=a.number,s=a.spaceSeparated;e.exports=r({transform:function(e,t){return"role"===t?t:"aria-"+t.slice(4).toLowerCase()},properties:{ariaActiveDescendant:null,ariaAtomic:i,ariaAutoComplete:null,ariaBusy:i,ariaChecked:i,ariaColCount:o,ariaColIndex:o,ariaColSpan:o,ariaControls:s,ariaCurrent:null,ariaDescribedBy:s,ariaDetails:null,ariaDisabled:i,ariaDropEffect:s,ariaErrorMessage:null,ariaExpanded:i,ariaFlowTo:s,ariaGrabbed:i,ariaHasPopup:null,ariaHidden:i,ariaInvalid:null,ariaKeyShortcuts:null,ariaLabel:null,ariaLabelledBy:s,ariaLevel:o,ariaLive:null,ariaModal:i,ariaMultiLine:i,ariaMultiSelectable:i,ariaOrientation:null,ariaOwns:s,ariaPlaceholder:null,ariaPosInSet:o,ariaPressed:i,ariaReadOnly:i,ariaRelevant:null,ariaRequired:i,ariaRoleDescription:s,ariaRowCount:o,ariaRowIndex:o,ariaRowSpan:o,ariaSelected:i,ariaSetSize:o,ariaSort:null,ariaValueMax:o,ariaValueMin:o,ariaValueNow:o,ariaValueText:null,role:null}})},53618:function(e,t,n){var a=n(7667),r=n(13585),i=n(46640),o=a.boolean,s=a.overloadedBoolean,l=a.booleanish,c=a.number,d=a.spaceSeparated,u=a.commaSeparated;e.exports=r({space:"html",attributes:{acceptcharset:"accept-charset",classname:"class",htmlfor:"for",httpequiv:"http-equiv"},transform:i,mustUseProperty:["checked","multiple","muted","selected"],properties:{abbr:null,accept:u,acceptCharset:d,accessKey:d,action:null,allow:null,allowFullScreen:o,allowPaymentRequest:o,allowUserMedia:o,alt:null,as:null,async:o,autoCapitalize:null,autoComplete:d,autoFocus:o,autoPlay:o,capture:o,charSet:null,checked:o,cite:null,className:d,cols:c,colSpan:null,content:null,contentEditable:l,controls:o,controlsList:d,coords:c|u,crossOrigin:null,data:null,dateTime:null,decoding:null,default:o,defer:o,dir:null,dirName:null,disabled:o,download:s,draggable:l,encType:null,enterKeyHint:null,form:null,formAction:null,formEncType:null,formMethod:null,formNoValidate:o,formTarget:null,headers:d,height:c,hidden:o,high:c,href:null,hrefLang:null,htmlFor:d,httpEquiv:d,id:null,imageSizes:null,imageSrcSet:u,inputMode:null,integrity:null,is:null,isMap:o,itemId:null,itemProp:d,itemRef:d,itemScope:o,itemType:d,kind:null,label:null,lang:null,language:null,list:null,loading:null,loop:o,low:c,manifest:null,max:null,maxLength:c,media:null,method:null,min:null,minLength:c,multiple:o,muted:o,name:null,nonce:null,noModule:o,noValidate:o,onAbort:null,onAfterPrint:null,onAuxClick:null,onBeforePrint:null,onBeforeUnload:null,onBlur:null,onCancel:null,onCanPlay:null,onCanPlayThrough:null,onChange:null,onClick:null,onClose:null,onContextMenu:null,onCopy:null,onCueChange:null,onCut:null,onDblClick:null,onDrag:null,onDragEnd:null,onDragEnter:null,onDragExit:null,onDragLeave:null,onDragOver:null,onDragStart:null,onDrop:null,onDurationChange:null,onEmptied:null,onEnded:null,onError:null,onFocus:null,onFormData:null,onHashChange:null,onInput:null,onInvalid:null,onKeyDown:null,onKeyPress:null,onKeyUp:null,onLanguageChange:null,onLoad:null,onLoadedData:null,onLoadedMetadata:null,onLoadEnd:null,onLoadStart:null,onMessage:null,onMessageError:null,onMouseDown:null,onMouseEnter:null,onMouseLeave:null,onMouseMove:null,onMouseOut:null,onMouseOver:null,onMouseUp:null,onOffline:null,onOnline:null,onPageHide:null,onPageShow:null,onPaste:null,onPause:null,onPlay:null,onPlaying:null,onPopState:null,onProgress:null,onRateChange:null,onRejectionHandled:null,onReset:null,onResize:null,onScroll:null,onSecurityPolicyViolation:null,onSeeked:null,onSeeking:null,onSelect:null,onSlotChange:null,onStalled:null,onStorage:null,onSubmit:null,onSuspend:null,onTimeUpdate:null,onToggle:null,onUnhandledRejection:null,onUnload:null,onVolumeChange:null,onWaiting:null,onWheel:null,open:o,optimum:c,pattern:null,ping:d,placeholder:null,playsInline:o,poster:null,preload:null,readOnly:o,referrerPolicy:null,rel:d,required:o,reversed:o,rows:c,rowSpan:c,sandbox:d,scope:null,scoped:o,seamless:o,selected:o,shape:null,size:c,sizes:null,slot:null,span:c,spellCheck:l,src:null,srcDoc:null,srcLang:null,srcSet:u,start:c,step:null,style:null,tabIndex:c,target:null,title:null,translate:null,type:null,typeMustMatch:o,useMap:null,value:l,width:c,wrap:null,align:null,aLink:null,archive:d,axis:null,background:null,bgColor:null,border:c,borderColor:null,bottomMargin:c,cellPadding:null,cellSpacing:null,char:null,charOff:null,classId:null,clear:null,code:null,codeBase:null,codeType:null,color:null,compact:o,declare:o,event:null,face:null,frame:null,frameBorder:null,hSpace:c,leftMargin:c,link:null,longDesc:null,lowSrc:null,marginHeight:c,marginWidth:c,noResize:o,noHref:o,noShade:o,noWrap:o,object:null,profile:null,prompt:null,rev:null,rightMargin:c,rules:null,scheme:null,scrolling:l,standby:null,summary:null,text:null,topMargin:c,valueType:null,version:null,vAlign:null,vLink:null,vSpace:c,allowTransparency:null,autoCorrect:null,autoSave:null,disablePictureInPicture:o,disableRemotePlayback:o,prefix:null,property:null,results:c,security:null,unselectable:null}})},46640:function(e,t,n){var a=n(25852);e.exports=function(e,t){return a(e,t.toLowerCase())}},25852:function(e){e.exports=function(e,t){return t in e?e[t]:t}},13585:function(e,t,n){var a=n(39900),r=n(94949),i=n(7478);e.exports=function(e){var t,n,o=e.space,s=e.mustUseProperty||[],l=e.attributes||{},c=e.properties,d=e.transform,u={},p={};for(t in c)n=new i(t,d(l,t),c[t],o),-1!==s.indexOf(t)&&(n.mustUseProperty=!0),u[t]=n,p[a(t)]=t,p[a(n.attribute)]=t;return new r(u,p,o)}},7478:function(e,t,n){var a=n(74108),r=n(7667);e.exports=s,s.prototype=new a,s.prototype.defined=!0;var i=["boolean","booleanish","overloadedBoolean","number","commaSeparated","spaceSeparated","commaOrSpaceSeparated"],o=i.length;function s(e,t,n,s){var l,c,d,u=-1;for(s&&(this.space=s),a.call(this,e,t);++u