diff --git a/enterprise/litellm_enterprise/proxy/hooks/managed_files.py b/enterprise/litellm_enterprise/proxy/hooks/managed_files.py index c55a4f038981..e3598d12c685 100644 --- a/enterprise/litellm_enterprise/proxy/hooks/managed_files.py +++ b/enterprise/litellm_enterprise/proxy/hooks/managed_files.py @@ -498,7 +498,6 @@ async def get_model_file_id_mapping( for file_id in file_ids: ## CHECK IF FILE ID IS MANAGED BY LITELM is_base64_unified_file_id = _is_base64_encoded_unified_file_id(file_id) - if is_base64_unified_file_id: litellm_managed_file_ids.append(file_id) @@ -509,6 +508,7 @@ async def get_model_file_id_mapping( unified_file_object = await self.get_unified_file_id( file_id, litellm_parent_otel_span ) + if unified_file_object: file_id_mapping[file_id] = unified_file_object.model_mappings @@ -784,18 +784,21 @@ async def afile_delete( llm_router: Router, **data: Dict, ) -> OpenAIFileObject: - file_id = convert_b64_uid_to_unified_uid(file_id) + + # file_id = convert_b64_uid_to_unified_uid(file_id) model_file_id_mapping = await self.get_model_file_id_mapping( [file_id], litellm_parent_otel_span ) + specific_model_file_id_mapping = model_file_id_mapping.get(file_id) if specific_model_file_id_mapping: - for model_id, file_id in specific_model_file_id_mapping.items(): - await llm_router.afile_delete(model=model_id, file_id=file_id, **data) # type: ignore + for model_id, model_file_id in specific_model_file_id_mapping.items(): + await llm_router.afile_delete(model=model_id, file_id=model_file_id, **data) # type: ignore stored_file_object = await self.delete_unified_file_id( file_id, litellm_parent_otel_span ) + if stored_file_object: return stored_file_object else: @@ -816,6 +819,7 @@ async def afile_content( model_file_id_mapping or await self.get_model_file_id_mapping([file_id], litellm_parent_otel_span) ) + specific_model_file_id_mapping = model_file_id_mapping.get(file_id) if specific_model_file_id_mapping: diff --git a/litellm/batches/main.py b/litellm/batches/main.py index 48521e5fba01..5279dd70bc42 100644 --- a/litellm/batches/main.py +++ b/litellm/batches/main.py @@ -17,6 +17,8 @@ from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast import httpx +from openai.types.batch import BatchRequestCounts +from openai.types.batch import Metadata as BatchMetadata import litellm from litellm._logging import verbose_logger @@ -223,10 +225,12 @@ def create_batch( api_key=optional_params.api_key, logging_obj=litellm_logging_obj, _is_async=_is_async, - client=client - if client is not None - and isinstance(client, (HTTPHandler, AsyncHTTPHandler)) - else None, + client=( + client + if client is not None + and isinstance(client, (HTTPHandler, AsyncHTTPHandler)) + else None + ), timeout=timeout, model=model, ) @@ -609,10 +613,12 @@ def retrieve_batch( function_id="batch_retrieve", ), _is_async=_is_async, - client=client - if client is not None - and isinstance(client, (HTTPHandler, AsyncHTTPHandler)) - else None, + client=( + client + if client is not None + and isinstance(client, (HTTPHandler, AsyncHTTPHandler)) + else None + ), timeout=timeout, model=model, ) @@ -799,6 +805,7 @@ def list_batches( async def acancel_batch( batch_id: str, + model: Optional[str] = None, custom_llm_provider: Literal["openai", "azure"] = "openai", metadata: Optional[Dict[str, str]] = None, extra_headers: Optional[Dict[str, str]] = None, @@ -813,11 +820,13 @@ async def acancel_batch( try: loop = asyncio.get_event_loop() kwargs["acancel_batch"] = True + model = kwargs.pop("model", None) # Use a partial function to pass your keyword arguments func = partial( cancel_batch, batch_id, + model, custom_llm_provider, metadata, extra_headers, @@ -840,7 +849,8 @@ async def acancel_batch( def cancel_batch( batch_id: str, - custom_llm_provider: Literal["openai", "azure"] = "openai", + model: Optional[str] = None, + custom_llm_provider: Union[Literal["openai", "azure"], str] = "openai", metadata: Optional[Dict[str, str]] = None, extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, @@ -852,6 +862,17 @@ def cancel_batch( LiteLLM Equivalent of POST https://api.openai.com/v1/batches/{batch_id}/cancel """ try: + + try: + if model is not None: + _, custom_llm_provider, _, _ = get_llm_provider( + model=model, + custom_llm_provider=custom_llm_provider, + ) + except Exception as e: + verbose_logger.exception( + f"litellm.batches.main.py::cancel_batch() - Error inferring custom_llm_provider - {str(e)}" + ) optional_params = GenericLiteLLMParams(**kwargs) litellm_params = get_litellm_params( custom_llm_provider=custom_llm_provider, @@ -1005,21 +1026,28 @@ async def _async_get_status(): created_at=status_response["submitTime"], in_progress_at=status_response["lastModifiedTime"], completed_at=status_response.get("endTime"), - failed_at=status_response.get("endTime") - if status_response["status"] == "failed" - else None, - request_counts={ - "total": 1, - "completed": 1 if status_response["status"] == "completed" else 0, - "failed": 1 if status_response["status"] == "failed" else 0, - }, - metadata={ - "output_file_id": status_response["outputDataConfig"][ - "s3OutputDataConfig" - ]["s3Uri"], - "failure_message": status_response.get("failureMessage"), - "model_arn": status_response["modelArn"], - }, + failed_at=( + status_response.get("endTime") + if status_response["status"] == "failed" + else None + ), + request_counts=BatchRequestCounts( + total=1, + completed=1 if status_response["status"] == "completed" else 0, + failed=1 if status_response["status"] == "failed" else 0, + ), + metadata=dict( + **{ + "output_file_id": status_response["outputDataConfig"][ + "s3OutputDataConfig" + ]["s3Uri"], + "failure_message": status_response.get("failureMessage") or "", + "model_arn": status_response["modelArn"], + } + ), + completion_window="24h", + endpoint="/v1/embeddings", + input_file_id="", ) return result diff --git a/litellm/files/main.py b/litellm/files/main.py index 9c85fa105653..535772fa42c0 100644 --- a/litellm/files/main.py +++ b/litellm/files/main.py @@ -95,7 +95,9 @@ async def acreate_file( def create_file( file: FileTypes, purpose: Literal["assistants", "batch", "fine-tune"], - custom_llm_provider: Optional[Literal["openai", "azure", "vertex_ai", "bedrock"]] = None, + custom_llm_provider: Optional[ + Literal["openai", "azure", "vertex_ai", "bedrock"] + ] = None, extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, **kwargs, @@ -155,10 +157,12 @@ def create_file( api_key=optional_params.api_key, logging_obj=logging_obj, _is_async=_is_async, - client=client - if client is not None - and isinstance(client, (HTTPHandler, AsyncHTTPHandler)) - else None, + client=( + client + if client is not None + and isinstance(client, (HTTPHandler, AsyncHTTPHandler)) + else None + ), timeout=timeout, ) elif custom_llm_provider == "openai": @@ -441,12 +445,14 @@ async def afile_delete( """ try: loop = asyncio.get_event_loop() + model = kwargs.pop("model", None) kwargs["is_async"] = True # Use a partial function to pass your keyword arguments func = partial( file_delete, file_id, + model, custom_llm_provider, extra_headers, extra_body, @@ -470,7 +476,8 @@ async def afile_delete( @client def file_delete( file_id: str, - custom_llm_provider: Literal["openai", "azure"] = "openai", + model: Optional[str] = None, + custom_llm_provider: Union[Literal["openai", "azure"], str] = "openai", extra_headers: Optional[Dict[str, str]] = None, extra_body: Optional[Dict[str, str]] = None, **kwargs, @@ -481,6 +488,13 @@ def file_delete( LiteLLM Equivalent of DELETE https://api.openai.com/v1/files """ try: + try: + if model is not None: + _, custom_llm_provider, _, _ = get_llm_provider( + model, custom_llm_provider + ) + except Exception: + pass optional_params = GenericLiteLLMParams(**kwargs) litellm_params_dict = get_litellm_params(**kwargs) ### TIMEOUT LOGIC ### @@ -566,7 +580,7 @@ def file_delete( ) else: raise litellm.exceptions.BadRequestError( - message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format( + message="LiteLLM doesn't support {} for 'delete_batch'. Only 'openai' is supported.".format( custom_llm_provider ), model="n/a", diff --git a/litellm/proxy/_experimental/out/api-reference.html b/litellm/proxy/_experimental/out/api-reference/index.html similarity index 100% rename from litellm/proxy/_experimental/out/api-reference.html rename to litellm/proxy/_experimental/out/api-reference/index.html diff --git a/litellm/proxy/_experimental/out/guardrails.html b/litellm/proxy/_experimental/out/guardrails.html deleted file mode 100644 index 3929c49a29bc..000000000000 --- a/litellm/proxy/_experimental/out/guardrails.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/logs.html b/litellm/proxy/_experimental/out/logs/index.html similarity index 100% rename from litellm/proxy/_experimental/out/logs.html rename to litellm/proxy/_experimental/out/logs/index.html diff --git a/litellm/proxy/_experimental/out/model-hub.html b/litellm/proxy/_experimental/out/model-hub/index.html similarity index 100% rename from litellm/proxy/_experimental/out/model-hub.html rename to litellm/proxy/_experimental/out/model-hub/index.html diff --git a/litellm/proxy/_experimental/out/model_hub_table.html b/litellm/proxy/_experimental/out/model_hub_table/index.html similarity index 100% rename from litellm/proxy/_experimental/out/model_hub_table.html rename to litellm/proxy/_experimental/out/model_hub_table/index.html diff --git a/litellm/proxy/_experimental/out/models-and-endpoints.html b/litellm/proxy/_experimental/out/models-and-endpoints/index.html similarity index 100% rename from litellm/proxy/_experimental/out/models-and-endpoints.html rename to litellm/proxy/_experimental/out/models-and-endpoints/index.html diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index e38fb5467311..000000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/organizations.html b/litellm/proxy/_experimental/out/organizations/index.html similarity index 100% rename from litellm/proxy/_experimental/out/organizations.html rename to litellm/proxy/_experimental/out/organizations/index.html diff --git a/litellm/proxy/_experimental/out/teams.html b/litellm/proxy/_experimental/out/teams/index.html similarity index 100% rename from litellm/proxy/_experimental/out/teams.html rename to litellm/proxy/_experimental/out/teams/index.html diff --git a/litellm/proxy/_experimental/out/test-key.html b/litellm/proxy/_experimental/out/test-key/index.html similarity index 100% rename from litellm/proxy/_experimental/out/test-key.html rename to litellm/proxy/_experimental/out/test-key/index.html diff --git a/litellm/proxy/_experimental/out/usage.html b/litellm/proxy/_experimental/out/usage/index.html similarity index 100% rename from litellm/proxy/_experimental/out/usage.html rename to litellm/proxy/_experimental/out/usage/index.html diff --git a/litellm/proxy/_experimental/out/users.html b/litellm/proxy/_experimental/out/users/index.html similarity index 100% rename from litellm/proxy/_experimental/out/users.html rename to litellm/proxy/_experimental/out/users/index.html diff --git a/litellm/proxy/_experimental/out/virtual-keys.html b/litellm/proxy/_experimental/out/virtual-keys/index.html similarity index 100% rename from litellm/proxy/_experimental/out/virtual-keys.html rename to litellm/proxy/_experimental/out/virtual-keys/index.html diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 9ef9812dd490..ff3c3e219b87 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,21 +1,8 @@ model_list: - model_name: gpt-5-mini litellm_params: - model: bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0 + model: gpt-5-mini - model_name: embedding-model litellm_params: model: openai/text-embedding-3-large -vector_store_registry: - - vector_store_name: "vertex-ai-litellm-website-knowledgebase" - litellm_params: - vector_store_id: "litellm-docs_1761094140318" - custom_llm_provider: "vertex_ai/search_api" - vertex_project: "test-vector-store-db" - vertex_location: "global" - - vector_store_name: "milvus-litellm-website-knowledgebase" - litellm_params: - vector_store_id: "can-be-anything" - custom_llm_provider: "milvus" - api_base: os.environ/MILVUS_API_BASE - api_key: os.environ/MILVUS_API_KEY \ No newline at end of file diff --git a/litellm/proxy/batches_endpoints/endpoints.py b/litellm/proxy/batches_endpoints/endpoints.py index 2fc0298d1c83..ac7082edb690 100644 --- a/litellm/proxy/batches_endpoints/endpoints.py +++ b/litellm/proxy/batches_endpoints/endpoints.py @@ -22,6 +22,9 @@ ) from litellm.proxy.openai_files_endpoints.common_utils import ( _is_base64_encoded_unified_file_id, + convert_b64_uid_to_unified_uid, + get_batch_id_from_unified_batch_id, + get_model_id_from_unified_batch_id, get_models_from_unified_file_id, ) from litellm.proxy.utils import handle_exception_on_proxy, is_known_model @@ -506,6 +509,7 @@ async def cancel_batch( from litellm.proxy.proxy_server import ( add_litellm_data_to_request, general_settings, + llm_router, proxy_config, proxy_logging_obj, version, @@ -517,6 +521,7 @@ async def cancel_batch( verbose_proxy_logger.debug( "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)), ) + unified_batch_id = _is_base64_encoded_unified_file_id(batch_id) # Include original request and headers in the data data = await add_litellm_data_to_request( @@ -528,14 +533,36 @@ async def cancel_batch( proxy_config=proxy_config, ) - custom_llm_provider = ( - provider or data.pop("custom_llm_provider", None) or "openai" - ) - _cancel_batch_data = CancelBatchRequest(batch_id=batch_id, **data) - response = await litellm.acancel_batch( - custom_llm_provider=custom_llm_provider, # type: ignore - **_cancel_batch_data, - ) + if unified_batch_id: + if llm_router is None: + raise HTTPException( + status_code=500, + detail={ + "error": "LLM Router not initialized. Ensure models added to proxy." + }, + ) + + model = ( + get_model_id_from_unified_batch_id(unified_batch_id) + if unified_batch_id + else None + ) + + model_batch_id = get_batch_id_from_unified_batch_id(unified_batch_id) + + data["batch_id"] = model_batch_id + + response = await llm_router.acancel_batch(model=model, **data) # type: ignore + else: + + custom_llm_provider = ( + provider or data.pop("custom_llm_provider", None) or "openai" + ) + _cancel_batch_data = CancelBatchRequest(batch_id=batch_id, **data) + response = await litellm.acancel_batch( + custom_llm_provider=custom_llm_provider, # type: ignore + **_cancel_batch_data, + ) ### ALERTING ### asyncio.create_task( diff --git a/litellm/proxy/openai_files_endpoints/files_endpoints.py b/litellm/proxy/openai_files_endpoints/files_endpoints.py index 043b0c886e98..b0be54761364 100644 --- a/litellm/proxy/openai_files_endpoints/files_endpoints.py +++ b/litellm/proxy/openai_files_endpoints/files_endpoints.py @@ -31,8 +31,8 @@ from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing from litellm.proxy.common_utils.openai_endpoint_utils import ( get_custom_llm_provider_from_request_body, - get_custom_llm_provider_from_request_query, get_custom_llm_provider_from_request_headers, + get_custom_llm_provider_from_request_query, ) from litellm.proxy.utils import ProxyLogging, is_known_model from litellm.router import Router @@ -788,6 +788,7 @@ async def delete_file( param="None", code=500, ) + response = await managed_files_obj.afile_delete( file_id=file_id, litellm_parent_otel_span=user_api_key_dict.parent_otel_span, @@ -828,12 +829,11 @@ async def delete_file( await proxy_logging_obj.post_call_failure_hook( user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data ) - verbose_proxy_logger.error( - "litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format( + verbose_proxy_logger.exception( + "litellm.proxy.proxy_server.delete_file(): Exception occured - {}".format( str(e) ) ) - verbose_proxy_logger.debug(traceback.format_exc()) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "message", str(e.detail)), diff --git a/litellm/router.py b/litellm/router.py index 1489de864886..21eb894c0ccd 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -153,11 +153,7 @@ ) from litellm.types.utils import ModelInfo from litellm.types.utils import ModelInfo as ModelMapInfo -from litellm.types.utils import ( - ModelResponseStream, - StandardLoggingPayload, - Usage, -) +from litellm.types.utils import ModelResponseStream, StandardLoggingPayload, Usage from litellm.utils import ( CustomStreamWrapper, EmbeddingResponse, @@ -357,6 +353,7 @@ def __init__( # noqa: PLR0915 self.enable_pre_call_checks = enable_pre_call_checks self.enable_tag_filtering = enable_tag_filtering from litellm._service_logger import ServiceLogging + self.service_logger_obj: ServiceLogging = ServiceLogging() litellm.suppress_debug_info = True # prevents 'Give Feedback/Get help' message from being emitted on Router - Relevant Issue: https://github.com/BerriAI/litellm/issues/5942 if self.set_verbose is True: @@ -375,9 +372,9 @@ def __init__( # noqa: PLR0915 ) # names of models under litellm_params. ex. azure/chatgpt-v-2 self.deployment_latency_map = {} ### CACHING ### - cache_type: Literal[ - "local", "redis", "redis-semantic", "s3", "disk" - ] = "local" # default to an in-memory cache + cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = ( + "local" # default to an in-memory cache + ) redis_cache = None cache_config: Dict[str, Any] = {} @@ -419,9 +416,9 @@ def __init__( # noqa: PLR0915 self.default_max_parallel_requests = default_max_parallel_requests self.provider_default_deployment_ids: List[str] = [] self.pattern_router = PatternMatchRouter() - self.team_pattern_routers: Dict[ - str, PatternMatchRouter - ] = {} # {"TEAM_ID": PatternMatchRouter} + self.team_pattern_routers: Dict[str, PatternMatchRouter] = ( + {} + ) # {"TEAM_ID": PatternMatchRouter} self.auto_routers: Dict[str, "AutoRouter"] = {} # Initialize model_group_alias early since it's used in set_model_list @@ -602,9 +599,9 @@ def __init__( # noqa: PLR0915 ) ) - self.model_group_retry_policy: Optional[ - Dict[str, RetryPolicy] - ] = model_group_retry_policy + self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = ( + model_group_retry_policy + ) self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None if allowed_fails_policy is not None: @@ -708,9 +705,7 @@ def routing_strategy_init( routing_strategy == RoutingStrategy.LEAST_BUSY.value or routing_strategy == RoutingStrategy.LEAST_BUSY ): - self.leastbusy_logger = LeastBusyLoggingHandler( - router_cache=self.cache - ) + self.leastbusy_logger = LeastBusyLoggingHandler(router_cache=self.cache) ## add callback if isinstance(litellm.input_callback, list): litellm.input_callback.append(self.leastbusy_logger) # type: ignore @@ -774,34 +769,84 @@ def initialize_assistants_endpoint(self): def _initialize_core_endpoints(self): """Helper to initialize core router endpoints.""" - self.amoderation = self.factory_function(litellm.amoderation, call_type="moderation") - self.aanthropic_messages = self.factory_function(litellm.anthropic_messages, call_type="anthropic_messages") - self.agenerate_content = self.factory_function(litellm.agenerate_content, call_type="agenerate_content") - self.aadapter_generate_content = self.factory_function(litellm.aadapter_generate_content, call_type="aadapter_generate_content") - self.aresponses = self.factory_function(litellm.aresponses, call_type="aresponses") - self.afile_delete = self.factory_function(litellm.afile_delete, call_type="afile_delete") - self.afile_content = self.factory_function(litellm.afile_content, call_type="afile_content") + self.amoderation = self.factory_function( + litellm.amoderation, call_type="moderation" + ) + self.aanthropic_messages = self.factory_function( + litellm.anthropic_messages, call_type="anthropic_messages" + ) + self.agenerate_content = self.factory_function( + litellm.agenerate_content, call_type="agenerate_content" + ) + self.aadapter_generate_content = self.factory_function( + litellm.aadapter_generate_content, call_type="aadapter_generate_content" + ) + self.aresponses = self.factory_function( + litellm.aresponses, call_type="aresponses" + ) + self.afile_delete = self.factory_function( + litellm.afile_delete, call_type="afile_delete" + ) + self.afile_content = self.factory_function( + litellm.afile_content, call_type="afile_content" + ) self.responses = self.factory_function(litellm.responses, call_type="responses") - self.aget_responses = self.factory_function(litellm.aget_responses, call_type="aget_responses") - self.acancel_responses = self.factory_function(litellm.acancel_responses, call_type="acancel_responses") - self.adelete_responses = self.factory_function(litellm.adelete_responses, call_type="adelete_responses") - self.alist_input_items = self.factory_function(litellm.alist_input_items, call_type="alist_input_items") - self._arealtime = self.factory_function(litellm._arealtime, call_type="_arealtime") - self.acreate_fine_tuning_job = self.factory_function(litellm.acreate_fine_tuning_job, call_type="acreate_fine_tuning_job") - self.acancel_fine_tuning_job = self.factory_function(litellm.acancel_fine_tuning_job, call_type="acancel_fine_tuning_job") - self.alist_fine_tuning_jobs = self.factory_function(litellm.alist_fine_tuning_jobs, call_type="alist_fine_tuning_jobs") - self.aretrieve_fine_tuning_job = self.factory_function(litellm.aretrieve_fine_tuning_job, call_type="aretrieve_fine_tuning_job") - self.afile_list = self.factory_function(litellm.afile_list, call_type="alist_files") - self.aimage_edit = self.factory_function(litellm.aimage_edit, call_type="aimage_edit") - self.allm_passthrough_route = self.factory_function(litellm.allm_passthrough_route, call_type="allm_passthrough_route") + self.aget_responses = self.factory_function( + litellm.aget_responses, call_type="aget_responses" + ) + self.acancel_responses = self.factory_function( + litellm.acancel_responses, call_type="acancel_responses" + ) + self.adelete_responses = self.factory_function( + litellm.adelete_responses, call_type="adelete_responses" + ) + self.alist_input_items = self.factory_function( + litellm.alist_input_items, call_type="alist_input_items" + ) + self._arealtime = self.factory_function( + litellm._arealtime, call_type="_arealtime" + ) + self.acreate_fine_tuning_job = self.factory_function( + litellm.acreate_fine_tuning_job, call_type="acreate_fine_tuning_job" + ) + self.acancel_fine_tuning_job = self.factory_function( + litellm.acancel_fine_tuning_job, call_type="acancel_fine_tuning_job" + ) + self.alist_fine_tuning_jobs = self.factory_function( + litellm.alist_fine_tuning_jobs, call_type="alist_fine_tuning_jobs" + ) + self.aretrieve_fine_tuning_job = self.factory_function( + litellm.aretrieve_fine_tuning_job, call_type="aretrieve_fine_tuning_job" + ) + self.afile_list = self.factory_function( + litellm.afile_list, call_type="alist_files" + ) + self.aimage_edit = self.factory_function( + litellm.aimage_edit, call_type="aimage_edit" + ) + self.allm_passthrough_route = self.factory_function( + litellm.allm_passthrough_route, call_type="allm_passthrough_route" + ) + self.acancel_batch = self.factory_function( + litellm.acancel_batch, call_type="acancel_batch" + ) def _initialize_specialized_endpoints(self): """Helper to initialize specialized router endpoints (vector store, OCR, search, video, container).""" from litellm.vector_stores.main import acreate, asearch, create, search - self.avector_store_search = self.factory_function(asearch, call_type="avector_store_search") - self.avector_store_create = self.factory_function(acreate, call_type="avector_store_create") - self.vector_store_search = self.factory_function(search, call_type="vector_store_search") - self.vector_store_create = self.factory_function(create, call_type="vector_store_create") + + self.avector_store_search = self.factory_function( + asearch, call_type="avector_store_search" + ) + self.avector_store_create = self.factory_function( + acreate, call_type="avector_store_create" + ) + self.vector_store_search = self.factory_function( + search, call_type="vector_store_search" + ) + self.vector_store_create = self.factory_function( + create, call_type="vector_store_create" + ) from litellm.google_genai import ( agenerate_content, @@ -809,16 +854,27 @@ def _initialize_specialized_endpoints(self): generate_content, generate_content_stream, ) - self.agenerate_content = self.factory_function(agenerate_content, call_type="agenerate_content") - self.generate_content = self.factory_function(generate_content, call_type="generate_content") - self.agenerate_content_stream = self.factory_function(agenerate_content_stream, call_type="agenerate_content_stream") - self.generate_content_stream = self.factory_function(generate_content_stream, call_type="generate_content_stream") + + self.agenerate_content = self.factory_function( + agenerate_content, call_type="agenerate_content" + ) + self.generate_content = self.factory_function( + generate_content, call_type="generate_content" + ) + self.agenerate_content_stream = self.factory_function( + agenerate_content_stream, call_type="agenerate_content_stream" + ) + self.generate_content_stream = self.factory_function( + generate_content_stream, call_type="generate_content_stream" + ) from litellm.ocr import aocr, ocr + self.aocr = self.factory_function(aocr, call_type="aocr") self.ocr = self.factory_function(ocr, call_type="ocr") from litellm.search import asearch, search + self.asearch = self.factory_function(asearch, call_type="asearch") self.search = self.factory_function(search, call_type="search") @@ -834,15 +890,30 @@ def _initialize_specialized_endpoints(self): video_remix, video_status, ) - self.avideo_generation = self.factory_function(avideo_generation, call_type="avideo_generation") - self.video_generation = self.factory_function(video_generation, call_type="video_generation") + + self.avideo_generation = self.factory_function( + avideo_generation, call_type="avideo_generation" + ) + self.video_generation = self.factory_function( + video_generation, call_type="video_generation" + ) self.avideo_list = self.factory_function(avideo_list, call_type="avideo_list") self.video_list = self.factory_function(video_list, call_type="video_list") - self.avideo_status = self.factory_function(avideo_status, call_type="avideo_status") - self.video_status = self.factory_function(video_status, call_type="video_status") - self.avideo_content = self.factory_function(avideo_content, call_type="avideo_content") - self.video_content = self.factory_function(video_content, call_type="video_content") - self.avideo_remix = self.factory_function(avideo_remix, call_type="avideo_remix") + self.avideo_status = self.factory_function( + avideo_status, call_type="avideo_status" + ) + self.video_status = self.factory_function( + video_status, call_type="video_status" + ) + self.avideo_content = self.factory_function( + avideo_content, call_type="avideo_content" + ) + self.video_content = self.factory_function( + video_content, call_type="video_content" + ) + self.avideo_remix = self.factory_function( + avideo_remix, call_type="avideo_remix" + ) self.video_remix = self.factory_function(video_remix, call_type="video_remix") from litellm.containers import ( @@ -855,14 +926,31 @@ def _initialize_specialized_endpoints(self): list_containers, retrieve_container, ) - self.acreate_container = self.factory_function(acreate_container, call_type="acreate_container") - self.create_container = self.factory_function(create_container, call_type="create_container") - self.alist_containers = self.factory_function(alist_containers, call_type="alist_containers") - self.list_containers = self.factory_function(list_containers, call_type="list_containers") - self.aretrieve_container = self.factory_function(aretrieve_container, call_type="aretrieve_container") - self.retrieve_container = self.factory_function(retrieve_container, call_type="retrieve_container") - self.adelete_container = self.factory_function(adelete_container, call_type="adelete_container") - self.delete_container = self.factory_function(delete_container, call_type="delete_container") + + self.acreate_container = self.factory_function( + acreate_container, call_type="acreate_container" + ) + self.create_container = self.factory_function( + create_container, call_type="create_container" + ) + self.alist_containers = self.factory_function( + alist_containers, call_type="alist_containers" + ) + self.list_containers = self.factory_function( + list_containers, call_type="list_containers" + ) + self.aretrieve_container = self.factory_function( + aretrieve_container, call_type="aretrieve_container" + ) + self.retrieve_container = self.factory_function( + retrieve_container, call_type="retrieve_container" + ) + self.adelete_container = self.factory_function( + adelete_container, call_type="adelete_container" + ) + self.delete_container = self.factory_function( + delete_container, call_type="delete_container" + ) def initialize_router_endpoints(self): self._initialize_core_endpoints() @@ -1226,7 +1314,10 @@ async def stream_with_fallbacks(): async def _acompletion( self, model: str, messages: List[Dict[str, str]], **kwargs - ) -> Union[ModelResponse, CustomStreamWrapper,]: + ) -> Union[ + ModelResponse, + CustomStreamWrapper, + ]: """ - Get an available deployment - call it with a semaphore over the call @@ -2694,21 +2785,19 @@ async def _aadapter_completion(self, adapter_id: str, model: str, **kwargs): self.fail_calls[model] += 1 raise e - async def _asearch_with_fallbacks( - self, original_function: Callable, **kwargs - ): + async def _asearch_with_fallbacks(self, original_function: Callable, **kwargs): """ Helper function to make a search API call through the router with load balancing and fallbacks. Reuses the router's retry/fallback infrastructure. """ from litellm.router_utils.search_api_router import SearchAPIRouter - + return await SearchAPIRouter.async_search_with_fallbacks( router_instance=self, original_function=original_function, **kwargs, ) - + async def _asearch_with_fallbacks_helper( self, model: str, original_generic_function: Callable, **kwargs ): @@ -2717,7 +2806,7 @@ async def _asearch_with_fallbacks_helper( Called by async_function_with_fallbacks for each retry attempt. """ from litellm.router_utils.search_api_router import SearchAPIRouter - + return await SearchAPIRouter.async_search_with_fallbacks_helper( router_instance=self, model=model, @@ -2755,11 +2844,9 @@ async def _ageneric_api_call_with_fallbacks( ) ) raise e - + def _add_deployment_model_to_endpoint_for_llm_passthrough_route( - self, kwargs: Dict[str, Any], - model: str, - model_name: str + self, kwargs: Dict[str, Any], model: str, model_name: str ) -> Dict[str, Any]: """ Add the deployment model to the endpoint for LLM passthrough route. @@ -2771,7 +2858,7 @@ def _add_deployment_model_to_endpoint_for_llm_passthrough_route( # For provider-specific endpoints, strip the provider prefix from model_name # e.g., "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0" -> "us.anthropic.claude-3-5-sonnet-20240620-v1:0" from litellm import get_llm_provider - + try: # get_llm_provider returns (model_without_prefix, provider, api_key, api_base) stripped_model_name, _, _, _ = get_llm_provider( @@ -2783,8 +2870,10 @@ def _add_deployment_model_to_endpoint_for_llm_passthrough_route( except Exception: # If get_llm_provider fails, fall back to using model_name as-is replacement_model_name = model_name - - kwargs["endpoint"] = kwargs["endpoint"].replace(model, replacement_model_name) + + kwargs["endpoint"] = kwargs["endpoint"].replace( + model, replacement_model_name + ) return kwargs async def _ageneric_api_call_with_fallbacks_helper( @@ -2818,7 +2907,9 @@ async def _ageneric_api_call_with_fallbacks_helper( model_name = data["model"] self.total_calls[model_name] += 1 - self._add_deployment_model_to_endpoint_for_llm_passthrough_route(kwargs=kwargs, model=model, model_name=model_name) + self._add_deployment_model_to_endpoint_for_llm_passthrough_route( + kwargs=kwargs, model=model, model_name=model_name + ) ### get custom response = original_generic_function( **{ @@ -3247,9 +3338,9 @@ async def create_file_for_deployment(deployment: dict) -> OpenAIFileObject: healthy_deployments=healthy_deployments, responses=responses ) returned_response = cast(OpenAIFileObject, responses[0]) - returned_response._hidden_params[ - "model_file_id_mapping" - ] = model_file_id_mapping + returned_response._hidden_params["model_file_id_mapping"] = ( + model_file_id_mapping + ) return returned_response except Exception as e: verbose_router_logger.exception( @@ -3582,6 +3673,7 @@ def factory_function( "afile_delete", "afile_content", "_arealtime", + "acancel_batch", "acreate_fine_tuning_job", "acancel_fine_tuning_job", "alist_fine_tuning_jobs", @@ -3620,7 +3712,7 @@ def factory_function( "aretrieve_container", "retrieve_container", "adelete_container", - "delete_container" + "delete_container", ] = "assistants", ): """ @@ -3706,6 +3798,7 @@ async def async_wrapper( "alist_containers", "aretrieve_container", "adelete_container", + "acancel_batch", ): return await self._ageneric_api_call_with_fallbacks( original_function=original_function, @@ -3862,11 +3955,11 @@ async def async_function_with_fallbacks_common_utils( # noqa: PLR0915 if isinstance(e, litellm.ContextWindowExceededError): if context_window_fallbacks is not None: - context_window_fallback_model_group: Optional[ - List[str] - ] = self._get_fallback_model_group_from_fallbacks( - fallbacks=context_window_fallbacks, - model_group=model_group, + context_window_fallback_model_group: Optional[List[str]] = ( + self._get_fallback_model_group_from_fallbacks( + fallbacks=context_window_fallbacks, + model_group=model_group, + ) ) if context_window_fallback_model_group is None: raise original_exception @@ -3898,11 +3991,11 @@ async def async_function_with_fallbacks_common_utils( # noqa: PLR0915 e.message += "\n{}".format(error_message) elif isinstance(e, litellm.ContentPolicyViolationError): if content_policy_fallbacks is not None: - content_policy_fallback_model_group: Optional[ - List[str] - ] = self._get_fallback_model_group_from_fallbacks( - fallbacks=content_policy_fallbacks, - model_group=model_group, + content_policy_fallback_model_group: Optional[List[str]] = ( + self._get_fallback_model_group_from_fallbacks( + fallbacks=content_policy_fallbacks, + model_group=model_group, + ) ) if content_policy_fallback_model_group is None: raise original_exception @@ -4620,7 +4713,7 @@ def deployment_callback_on_failure( try: exception = kwargs.get("exception", None) exception_status = getattr(exception, "status_code", "") - + # Cache litellm_params to avoid repeated dict lookups litellm_params = kwargs.get("litellm_params", {}) _model_info = litellm_params.get("model_info", {}) @@ -5144,26 +5237,26 @@ def init_auto_router_deployment(self, deployment: Deployment): """ from litellm.router_strategy.auto_router.auto_router import AutoRouter - auto_router_config_path: Optional[ - str - ] = deployment.litellm_params.auto_router_config_path + auto_router_config_path: Optional[str] = ( + deployment.litellm_params.auto_router_config_path + ) auto_router_config: Optional[str] = deployment.litellm_params.auto_router_config if auto_router_config_path is None and auto_router_config is None: raise ValueError( "auto_router_config_path or auto_router_config is required for auto-router deployments. Please set it in the litellm_params" ) - default_model: Optional[ - str - ] = deployment.litellm_params.auto_router_default_model + default_model: Optional[str] = ( + deployment.litellm_params.auto_router_default_model + ) if default_model is None: raise ValueError( "auto_router_default_model is required for auto-router deployments. Please set it in the litellm_params" ) - embedding_model: Optional[ - str - ] = deployment.litellm_params.auto_router_embedding_model + embedding_model: Optional[str] = ( + deployment.litellm_params.auto_router_embedding_model + ) if embedding_model is None: raise ValueError( "auto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params" @@ -5269,7 +5362,7 @@ def set_model_list(self, model_list: list): f"\nInitialized Model List {self.get_model_names()}" ) self.model_names = {m["model_name"] for m in model_list} - + # Note: model_name_to_deployment_indices is already built incrementally # by _create_deployment -> _add_model_to_list_and_index_map @@ -5494,13 +5587,13 @@ def _update_deployment_indices_after_removal( # Remove the deleted model from index if model_id in self.model_id_to_deployment_index_map: del self.model_id_to_deployment_index_map[model_id] - + # Update model_name_to_deployment_indices for model_name, indices in list(self.model_name_to_deployment_indices.items()): # Remove the deleted index if removal_idx in indices: indices.remove(removal_idx) - + # Decrement all indices greater than removal_idx updated_indices = [] for idx in indices: @@ -5508,7 +5601,7 @@ def _update_deployment_indices_after_removal( updated_indices.append(idx - 1) else: updated_indices.append(idx) - + # Update or remove the entry if len(updated_indices) > 0: self.model_name_to_deployment_indices[model_name] = updated_indices @@ -5527,13 +5620,13 @@ def _add_model_to_list_and_index_map( """ idx = len(self.model_list) self.model_list.append(model) - + # Update model_id index for O(1) lookup if model_id is not None: self.model_id_to_deployment_index_map[model_id] = idx elif model.get("model_info", {}).get("id") is not None: self.model_id_to_deployment_index_map[model["model_info"]["id"]] = idx - + # Update model_name index for O(1) lookup model_name = model.get("model_name") if model_name: @@ -5653,7 +5746,7 @@ def get_deployment_by_model_group_name( Returns -> Deployment or None Raise Exception -> if model found in invalid format - + Optimized with O(1) index lookup instead of O(n) linear scan. """ # O(1) lookup in model_name index @@ -5771,7 +5864,7 @@ def get_model_info(self, id: str) -> Optional[dict]: Returns - dict: the model in list with 'model_name', 'litellm_params', Optional['model_info'] - None: could not find deployment in list - + Optimized with O(1) index lookup instead of O(n) linear scan. """ # O(1) lookup via model_id_to_deployment_index_map @@ -5886,11 +5979,11 @@ def _set_model_group_info( # noqa: PLR0915 configurable_clientside_auth_params = ( litellm_params.configurable_clientside_auth_params ) - + # Cache nested dict access to avoid repeated temporary dict allocations model_litellm_params = model.get("litellm_params", {}) model_info_dict = model.get("model_info", {}) - + # get model tpm _deployment_tpm: Optional[int] = None if _deployment_tpm is None: @@ -6266,12 +6359,12 @@ async def set_response_headers( def _build_model_name_index(self, model_list: list) -> None: """ Build model_name -> deployment indices mapping for O(1) lookups. - + This index allows us to find all deployments for a given model_name in O(1) time instead of O(n) linear scan through the entire model_list. """ self.model_name_to_deployment_indices.clear() - + for idx, model in enumerate(model_list): model_name = model.get("model_name") if model_name: @@ -6311,12 +6404,12 @@ def get_model_ids( if 'model_name' is none, returns all. Returns list of model id's. - + Optimized with O(1) or O(k) index lookup when model_name provided, instead of O(n) linear scan. - """ + """ ids = [] - + if model_name is not None: # O(1) lookup in model_name index, then O(k) iteration where k = deployments for this model_name if model_name in self.model_name_to_deployment_indices: @@ -6337,7 +6430,7 @@ def get_model_ids( if exclude_team_models and model["model_info"].get("team_id"): continue ids.append(model_id) - + return ids def has_model_id(self, candidate_id: str) -> bool: @@ -6399,15 +6492,15 @@ def _get_all_deployments( Used for accurate 'get_model_list'. if team_id specified, only return team-specific models - + Optimized with O(1) index lookup instead of O(n) linear scan. """ returned_models: List[DeploymentTypedDict] = [] - + # O(1) lookup in model_name index if model_name in self.model_name_to_deployment_indices: indices = self.model_name_to_deployment_indices[model_name] - + # O(k) where k = deployments for this model_name (typically 1-10) for idx in indices: model = self.model_list[idx] @@ -6556,9 +6649,7 @@ def get_model_list( potential_team_only_wildcard_models = ( self.team_pattern_routers[team_id].route(model_name) or [] ) - potential_wildcard_models.extend( - potential_team_only_wildcard_models - ) + potential_wildcard_models.extend(potential_team_only_wildcard_models) if model_name is not None and potential_wildcard_models is not None: for m in potential_wildcard_models: @@ -6821,7 +6912,7 @@ def _pre_call_checks( # noqa: PLR0915 # Cache nested dict access to avoid repeated temporary dict allocations _litellm_params = deployment.get("litellm_params", {}) _model_info = deployment.get("model_info", {}) - + # see if we have the info for this model try: base_model = _model_info.get("base_model", None) @@ -6949,7 +7040,9 @@ def _pre_call_checks( # noqa: PLR0915 if len(invalid_model_indices) > 0: # Single-pass filter using set for O(1) lookups (avoids O(n^2) from repeated pops) _returned_deployments = [ - d for i, d in enumerate(_returned_deployments) if i not in invalid_model_indices + d + for i, d in enumerate(_returned_deployments) + if i not in invalid_model_indices ] ## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2) @@ -7505,7 +7598,8 @@ def _filter_cooldown_deployments( # Convert to set for O(1) lookup and use list comprehension for O(n) filtering cooldown_set = set(cooldown_deployments) return [ - deployment for deployment in healthy_deployments + deployment + for deployment in healthy_deployments if deployment["model_info"]["id"] not in cooldown_set ]