diff --git a/enterprise/litellm_enterprise/proxy/hooks/managed_files.py b/enterprise/litellm_enterprise/proxy/hooks/managed_files.py
index c55a4f038981..e3598d12c685 100644
--- a/enterprise/litellm_enterprise/proxy/hooks/managed_files.py
+++ b/enterprise/litellm_enterprise/proxy/hooks/managed_files.py
@@ -498,7 +498,6 @@ async def get_model_file_id_mapping(
for file_id in file_ids:
## CHECK IF FILE ID IS MANAGED BY LITELM
is_base64_unified_file_id = _is_base64_encoded_unified_file_id(file_id)
-
if is_base64_unified_file_id:
litellm_managed_file_ids.append(file_id)
@@ -509,6 +508,7 @@ async def get_model_file_id_mapping(
unified_file_object = await self.get_unified_file_id(
file_id, litellm_parent_otel_span
)
+
if unified_file_object:
file_id_mapping[file_id] = unified_file_object.model_mappings
@@ -784,18 +784,21 @@ async def afile_delete(
llm_router: Router,
**data: Dict,
) -> OpenAIFileObject:
- file_id = convert_b64_uid_to_unified_uid(file_id)
+
+ # file_id = convert_b64_uid_to_unified_uid(file_id)
model_file_id_mapping = await self.get_model_file_id_mapping(
[file_id], litellm_parent_otel_span
)
+
specific_model_file_id_mapping = model_file_id_mapping.get(file_id)
if specific_model_file_id_mapping:
- for model_id, file_id in specific_model_file_id_mapping.items():
- await llm_router.afile_delete(model=model_id, file_id=file_id, **data) # type: ignore
+ for model_id, model_file_id in specific_model_file_id_mapping.items():
+ await llm_router.afile_delete(model=model_id, file_id=model_file_id, **data) # type: ignore
stored_file_object = await self.delete_unified_file_id(
file_id, litellm_parent_otel_span
)
+
if stored_file_object:
return stored_file_object
else:
@@ -816,6 +819,7 @@ async def afile_content(
model_file_id_mapping
or await self.get_model_file_id_mapping([file_id], litellm_parent_otel_span)
)
+
specific_model_file_id_mapping = model_file_id_mapping.get(file_id)
if specific_model_file_id_mapping:
diff --git a/litellm/batches/main.py b/litellm/batches/main.py
index 48521e5fba01..5279dd70bc42 100644
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@@ -17,6 +17,8 @@
from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
import httpx
+from openai.types.batch import BatchRequestCounts
+from openai.types.batch import Metadata as BatchMetadata
import litellm
from litellm._logging import verbose_logger
@@ -223,10 +225,12 @@ def create_batch(
api_key=optional_params.api_key,
logging_obj=litellm_logging_obj,
_is_async=_is_async,
- client=client
- if client is not None
- and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
- else None,
+ client=(
+ client
+ if client is not None
+ and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
+ else None
+ ),
timeout=timeout,
model=model,
)
@@ -609,10 +613,12 @@ def retrieve_batch(
function_id="batch_retrieve",
),
_is_async=_is_async,
- client=client
- if client is not None
- and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
- else None,
+ client=(
+ client
+ if client is not None
+ and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
+ else None
+ ),
timeout=timeout,
model=model,
)
@@ -799,6 +805,7 @@ def list_batches(
async def acancel_batch(
batch_id: str,
+ model: Optional[str] = None,
custom_llm_provider: Literal["openai", "azure"] = "openai",
metadata: Optional[Dict[str, str]] = None,
extra_headers: Optional[Dict[str, str]] = None,
@@ -813,11 +820,13 @@ async def acancel_batch(
try:
loop = asyncio.get_event_loop()
kwargs["acancel_batch"] = True
+ model = kwargs.pop("model", None)
# Use a partial function to pass your keyword arguments
func = partial(
cancel_batch,
batch_id,
+ model,
custom_llm_provider,
metadata,
extra_headers,
@@ -840,7 +849,8 @@ async def acancel_batch(
def cancel_batch(
batch_id: str,
- custom_llm_provider: Literal["openai", "azure"] = "openai",
+ model: Optional[str] = None,
+ custom_llm_provider: Union[Literal["openai", "azure"], str] = "openai",
metadata: Optional[Dict[str, str]] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
@@ -852,6 +862,17 @@ def cancel_batch(
LiteLLM Equivalent of POST https://api.openai.com/v1/batches/{batch_id}/cancel
"""
try:
+
+ try:
+ if model is not None:
+ _, custom_llm_provider, _, _ = get_llm_provider(
+ model=model,
+ custom_llm_provider=custom_llm_provider,
+ )
+ except Exception as e:
+ verbose_logger.exception(
+ f"litellm.batches.main.py::cancel_batch() - Error inferring custom_llm_provider - {str(e)}"
+ )
optional_params = GenericLiteLLMParams(**kwargs)
litellm_params = get_litellm_params(
custom_llm_provider=custom_llm_provider,
@@ -1005,21 +1026,28 @@ async def _async_get_status():
created_at=status_response["submitTime"],
in_progress_at=status_response["lastModifiedTime"],
completed_at=status_response.get("endTime"),
- failed_at=status_response.get("endTime")
- if status_response["status"] == "failed"
- else None,
- request_counts={
- "total": 1,
- "completed": 1 if status_response["status"] == "completed" else 0,
- "failed": 1 if status_response["status"] == "failed" else 0,
- },
- metadata={
- "output_file_id": status_response["outputDataConfig"][
- "s3OutputDataConfig"
- ]["s3Uri"],
- "failure_message": status_response.get("failureMessage"),
- "model_arn": status_response["modelArn"],
- },
+ failed_at=(
+ status_response.get("endTime")
+ if status_response["status"] == "failed"
+ else None
+ ),
+ request_counts=BatchRequestCounts(
+ total=1,
+ completed=1 if status_response["status"] == "completed" else 0,
+ failed=1 if status_response["status"] == "failed" else 0,
+ ),
+ metadata=dict(
+ **{
+ "output_file_id": status_response["outputDataConfig"][
+ "s3OutputDataConfig"
+ ]["s3Uri"],
+ "failure_message": status_response.get("failureMessage") or "",
+ "model_arn": status_response["modelArn"],
+ }
+ ),
+ completion_window="24h",
+ endpoint="/v1/embeddings",
+ input_file_id="",
)
return result
diff --git a/litellm/files/main.py b/litellm/files/main.py
index 9c85fa105653..535772fa42c0 100644
--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@@ -95,7 +95,9 @@ async def acreate_file(
def create_file(
file: FileTypes,
purpose: Literal["assistants", "batch", "fine-tune"],
- custom_llm_provider: Optional[Literal["openai", "azure", "vertex_ai", "bedrock"]] = None,
+ custom_llm_provider: Optional[
+ Literal["openai", "azure", "vertex_ai", "bedrock"]
+ ] = None,
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
@@ -155,10 +157,12 @@ def create_file(
api_key=optional_params.api_key,
logging_obj=logging_obj,
_is_async=_is_async,
- client=client
- if client is not None
- and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
- else None,
+ client=(
+ client
+ if client is not None
+ and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
+ else None
+ ),
timeout=timeout,
)
elif custom_llm_provider == "openai":
@@ -441,12 +445,14 @@ async def afile_delete(
"""
try:
loop = asyncio.get_event_loop()
+ model = kwargs.pop("model", None)
kwargs["is_async"] = True
# Use a partial function to pass your keyword arguments
func = partial(
file_delete,
file_id,
+ model,
custom_llm_provider,
extra_headers,
extra_body,
@@ -470,7 +476,8 @@ async def afile_delete(
@client
def file_delete(
file_id: str,
- custom_llm_provider: Literal["openai", "azure"] = "openai",
+ model: Optional[str] = None,
+ custom_llm_provider: Union[Literal["openai", "azure"], str] = "openai",
extra_headers: Optional[Dict[str, str]] = None,
extra_body: Optional[Dict[str, str]] = None,
**kwargs,
@@ -481,6 +488,13 @@ def file_delete(
LiteLLM Equivalent of DELETE https://api.openai.com/v1/files
"""
try:
+ try:
+ if model is not None:
+ _, custom_llm_provider, _, _ = get_llm_provider(
+ model, custom_llm_provider
+ )
+ except Exception:
+ pass
optional_params = GenericLiteLLMParams(**kwargs)
litellm_params_dict = get_litellm_params(**kwargs)
### TIMEOUT LOGIC ###
@@ -566,7 +580,7 @@ def file_delete(
)
else:
raise litellm.exceptions.BadRequestError(
- message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
+ message="LiteLLM doesn't support {} for 'delete_batch'. Only 'openai' is supported.".format(
custom_llm_provider
),
model="n/a",
diff --git a/litellm/proxy/_experimental/out/api-reference.html b/litellm/proxy/_experimental/out/api-reference/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/api-reference.html
rename to litellm/proxy/_experimental/out/api-reference/index.html
diff --git a/litellm/proxy/_experimental/out/guardrails.html b/litellm/proxy/_experimental/out/guardrails.html
deleted file mode 100644
index 3929c49a29bc..000000000000
--- a/litellm/proxy/_experimental/out/guardrails.html
+++ /dev/null
@@ -1 +0,0 @@
-
LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/logs.html b/litellm/proxy/_experimental/out/logs/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/logs.html
rename to litellm/proxy/_experimental/out/logs/index.html
diff --git a/litellm/proxy/_experimental/out/model-hub.html b/litellm/proxy/_experimental/out/model-hub/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/model-hub.html
rename to litellm/proxy/_experimental/out/model-hub/index.html
diff --git a/litellm/proxy/_experimental/out/model_hub_table.html b/litellm/proxy/_experimental/out/model_hub_table/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/model_hub_table.html
rename to litellm/proxy/_experimental/out/model_hub_table/index.html
diff --git a/litellm/proxy/_experimental/out/models-and-endpoints.html b/litellm/proxy/_experimental/out/models-and-endpoints/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/models-and-endpoints.html
rename to litellm/proxy/_experimental/out/models-and-endpoints/index.html
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index e38fb5467311..000000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/organizations.html b/litellm/proxy/_experimental/out/organizations/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/organizations.html
rename to litellm/proxy/_experimental/out/organizations/index.html
diff --git a/litellm/proxy/_experimental/out/teams.html b/litellm/proxy/_experimental/out/teams/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/teams.html
rename to litellm/proxy/_experimental/out/teams/index.html
diff --git a/litellm/proxy/_experimental/out/test-key.html b/litellm/proxy/_experimental/out/test-key/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/test-key.html
rename to litellm/proxy/_experimental/out/test-key/index.html
diff --git a/litellm/proxy/_experimental/out/usage.html b/litellm/proxy/_experimental/out/usage/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/usage.html
rename to litellm/proxy/_experimental/out/usage/index.html
diff --git a/litellm/proxy/_experimental/out/users.html b/litellm/proxy/_experimental/out/users/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/users.html
rename to litellm/proxy/_experimental/out/users/index.html
diff --git a/litellm/proxy/_experimental/out/virtual-keys.html b/litellm/proxy/_experimental/out/virtual-keys/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/virtual-keys.html
rename to litellm/proxy/_experimental/out/virtual-keys/index.html
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 9ef9812dd490..ff3c3e219b87 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,21 +1,8 @@
model_list:
- model_name: gpt-5-mini
litellm_params:
- model: bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0
+ model: gpt-5-mini
- model_name: embedding-model
litellm_params:
model: openai/text-embedding-3-large
-vector_store_registry:
- - vector_store_name: "vertex-ai-litellm-website-knowledgebase"
- litellm_params:
- vector_store_id: "litellm-docs_1761094140318"
- custom_llm_provider: "vertex_ai/search_api"
- vertex_project: "test-vector-store-db"
- vertex_location: "global"
- - vector_store_name: "milvus-litellm-website-knowledgebase"
- litellm_params:
- vector_store_id: "can-be-anything"
- custom_llm_provider: "milvus"
- api_base: os.environ/MILVUS_API_BASE
- api_key: os.environ/MILVUS_API_KEY
\ No newline at end of file
diff --git a/litellm/proxy/batches_endpoints/endpoints.py b/litellm/proxy/batches_endpoints/endpoints.py
index 2fc0298d1c83..ac7082edb690 100644
--- a/litellm/proxy/batches_endpoints/endpoints.py
+++ b/litellm/proxy/batches_endpoints/endpoints.py
@@ -22,6 +22,9 @@
)
from litellm.proxy.openai_files_endpoints.common_utils import (
_is_base64_encoded_unified_file_id,
+ convert_b64_uid_to_unified_uid,
+ get_batch_id_from_unified_batch_id,
+ get_model_id_from_unified_batch_id,
get_models_from_unified_file_id,
)
from litellm.proxy.utils import handle_exception_on_proxy, is_known_model
@@ -506,6 +509,7 @@ async def cancel_batch(
from litellm.proxy.proxy_server import (
add_litellm_data_to_request,
general_settings,
+ llm_router,
proxy_config,
proxy_logging_obj,
version,
@@ -517,6 +521,7 @@ async def cancel_batch(
verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
)
+ unified_batch_id = _is_base64_encoded_unified_file_id(batch_id)
# Include original request and headers in the data
data = await add_litellm_data_to_request(
@@ -528,14 +533,36 @@ async def cancel_batch(
proxy_config=proxy_config,
)
- custom_llm_provider = (
- provider or data.pop("custom_llm_provider", None) or "openai"
- )
- _cancel_batch_data = CancelBatchRequest(batch_id=batch_id, **data)
- response = await litellm.acancel_batch(
- custom_llm_provider=custom_llm_provider, # type: ignore
- **_cancel_batch_data,
- )
+ if unified_batch_id:
+ if llm_router is None:
+ raise HTTPException(
+ status_code=500,
+ detail={
+ "error": "LLM Router not initialized. Ensure models added to proxy."
+ },
+ )
+
+ model = (
+ get_model_id_from_unified_batch_id(unified_batch_id)
+ if unified_batch_id
+ else None
+ )
+
+ model_batch_id = get_batch_id_from_unified_batch_id(unified_batch_id)
+
+ data["batch_id"] = model_batch_id
+
+ response = await llm_router.acancel_batch(model=model, **data) # type: ignore
+ else:
+
+ custom_llm_provider = (
+ provider or data.pop("custom_llm_provider", None) or "openai"
+ )
+ _cancel_batch_data = CancelBatchRequest(batch_id=batch_id, **data)
+ response = await litellm.acancel_batch(
+ custom_llm_provider=custom_llm_provider, # type: ignore
+ **_cancel_batch_data,
+ )
### ALERTING ###
asyncio.create_task(
diff --git a/litellm/proxy/openai_files_endpoints/files_endpoints.py b/litellm/proxy/openai_files_endpoints/files_endpoints.py
index 043b0c886e98..b0be54761364 100644
--- a/litellm/proxy/openai_files_endpoints/files_endpoints.py
+++ b/litellm/proxy/openai_files_endpoints/files_endpoints.py
@@ -31,8 +31,8 @@
from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
from litellm.proxy.common_utils.openai_endpoint_utils import (
get_custom_llm_provider_from_request_body,
- get_custom_llm_provider_from_request_query,
get_custom_llm_provider_from_request_headers,
+ get_custom_llm_provider_from_request_query,
)
from litellm.proxy.utils import ProxyLogging, is_known_model
from litellm.router import Router
@@ -788,6 +788,7 @@ async def delete_file(
param="None",
code=500,
)
+
response = await managed_files_obj.afile_delete(
file_id=file_id,
litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
@@ -828,12 +829,11 @@ async def delete_file(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
- verbose_proxy_logger.error(
- "litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
+ verbose_proxy_logger.exception(
+ "litellm.proxy.proxy_server.delete_file(): Exception occured - {}".format(
str(e)
)
)
- verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
diff --git a/litellm/router.py b/litellm/router.py
index 1489de864886..21eb894c0ccd 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -153,11 +153,7 @@
)
from litellm.types.utils import ModelInfo
from litellm.types.utils import ModelInfo as ModelMapInfo
-from litellm.types.utils import (
- ModelResponseStream,
- StandardLoggingPayload,
- Usage,
-)
+from litellm.types.utils import ModelResponseStream, StandardLoggingPayload, Usage
from litellm.utils import (
CustomStreamWrapper,
EmbeddingResponse,
@@ -357,6 +353,7 @@ def __init__( # noqa: PLR0915
self.enable_pre_call_checks = enable_pre_call_checks
self.enable_tag_filtering = enable_tag_filtering
from litellm._service_logger import ServiceLogging
+
self.service_logger_obj: ServiceLogging = ServiceLogging()
litellm.suppress_debug_info = True # prevents 'Give Feedback/Get help' message from being emitted on Router - Relevant Issue: https://github.com/BerriAI/litellm/issues/5942
if self.set_verbose is True:
@@ -375,9 +372,9 @@ def __init__( # noqa: PLR0915
) # names of models under litellm_params. ex. azure/chatgpt-v-2
self.deployment_latency_map = {}
### CACHING ###
- cache_type: Literal[
- "local", "redis", "redis-semantic", "s3", "disk"
- ] = "local" # default to an in-memory cache
+ cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
+ "local" # default to an in-memory cache
+ )
redis_cache = None
cache_config: Dict[str, Any] = {}
@@ -419,9 +416,9 @@ def __init__( # noqa: PLR0915
self.default_max_parallel_requests = default_max_parallel_requests
self.provider_default_deployment_ids: List[str] = []
self.pattern_router = PatternMatchRouter()
- self.team_pattern_routers: Dict[
- str, PatternMatchRouter
- ] = {} # {"TEAM_ID": PatternMatchRouter}
+ self.team_pattern_routers: Dict[str, PatternMatchRouter] = (
+ {}
+ ) # {"TEAM_ID": PatternMatchRouter}
self.auto_routers: Dict[str, "AutoRouter"] = {}
# Initialize model_group_alias early since it's used in set_model_list
@@ -602,9 +599,9 @@ def __init__( # noqa: PLR0915
)
)
- self.model_group_retry_policy: Optional[
- Dict[str, RetryPolicy]
- ] = model_group_retry_policy
+ self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
+ model_group_retry_policy
+ )
self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
if allowed_fails_policy is not None:
@@ -708,9 +705,7 @@ def routing_strategy_init(
routing_strategy == RoutingStrategy.LEAST_BUSY.value
or routing_strategy == RoutingStrategy.LEAST_BUSY
):
- self.leastbusy_logger = LeastBusyLoggingHandler(
- router_cache=self.cache
- )
+ self.leastbusy_logger = LeastBusyLoggingHandler(router_cache=self.cache)
## add callback
if isinstance(litellm.input_callback, list):
litellm.input_callback.append(self.leastbusy_logger) # type: ignore
@@ -774,34 +769,84 @@ def initialize_assistants_endpoint(self):
def _initialize_core_endpoints(self):
"""Helper to initialize core router endpoints."""
- self.amoderation = self.factory_function(litellm.amoderation, call_type="moderation")
- self.aanthropic_messages = self.factory_function(litellm.anthropic_messages, call_type="anthropic_messages")
- self.agenerate_content = self.factory_function(litellm.agenerate_content, call_type="agenerate_content")
- self.aadapter_generate_content = self.factory_function(litellm.aadapter_generate_content, call_type="aadapter_generate_content")
- self.aresponses = self.factory_function(litellm.aresponses, call_type="aresponses")
- self.afile_delete = self.factory_function(litellm.afile_delete, call_type="afile_delete")
- self.afile_content = self.factory_function(litellm.afile_content, call_type="afile_content")
+ self.amoderation = self.factory_function(
+ litellm.amoderation, call_type="moderation"
+ )
+ self.aanthropic_messages = self.factory_function(
+ litellm.anthropic_messages, call_type="anthropic_messages"
+ )
+ self.agenerate_content = self.factory_function(
+ litellm.agenerate_content, call_type="agenerate_content"
+ )
+ self.aadapter_generate_content = self.factory_function(
+ litellm.aadapter_generate_content, call_type="aadapter_generate_content"
+ )
+ self.aresponses = self.factory_function(
+ litellm.aresponses, call_type="aresponses"
+ )
+ self.afile_delete = self.factory_function(
+ litellm.afile_delete, call_type="afile_delete"
+ )
+ self.afile_content = self.factory_function(
+ litellm.afile_content, call_type="afile_content"
+ )
self.responses = self.factory_function(litellm.responses, call_type="responses")
- self.aget_responses = self.factory_function(litellm.aget_responses, call_type="aget_responses")
- self.acancel_responses = self.factory_function(litellm.acancel_responses, call_type="acancel_responses")
- self.adelete_responses = self.factory_function(litellm.adelete_responses, call_type="adelete_responses")
- self.alist_input_items = self.factory_function(litellm.alist_input_items, call_type="alist_input_items")
- self._arealtime = self.factory_function(litellm._arealtime, call_type="_arealtime")
- self.acreate_fine_tuning_job = self.factory_function(litellm.acreate_fine_tuning_job, call_type="acreate_fine_tuning_job")
- self.acancel_fine_tuning_job = self.factory_function(litellm.acancel_fine_tuning_job, call_type="acancel_fine_tuning_job")
- self.alist_fine_tuning_jobs = self.factory_function(litellm.alist_fine_tuning_jobs, call_type="alist_fine_tuning_jobs")
- self.aretrieve_fine_tuning_job = self.factory_function(litellm.aretrieve_fine_tuning_job, call_type="aretrieve_fine_tuning_job")
- self.afile_list = self.factory_function(litellm.afile_list, call_type="alist_files")
- self.aimage_edit = self.factory_function(litellm.aimage_edit, call_type="aimage_edit")
- self.allm_passthrough_route = self.factory_function(litellm.allm_passthrough_route, call_type="allm_passthrough_route")
+ self.aget_responses = self.factory_function(
+ litellm.aget_responses, call_type="aget_responses"
+ )
+ self.acancel_responses = self.factory_function(
+ litellm.acancel_responses, call_type="acancel_responses"
+ )
+ self.adelete_responses = self.factory_function(
+ litellm.adelete_responses, call_type="adelete_responses"
+ )
+ self.alist_input_items = self.factory_function(
+ litellm.alist_input_items, call_type="alist_input_items"
+ )
+ self._arealtime = self.factory_function(
+ litellm._arealtime, call_type="_arealtime"
+ )
+ self.acreate_fine_tuning_job = self.factory_function(
+ litellm.acreate_fine_tuning_job, call_type="acreate_fine_tuning_job"
+ )
+ self.acancel_fine_tuning_job = self.factory_function(
+ litellm.acancel_fine_tuning_job, call_type="acancel_fine_tuning_job"
+ )
+ self.alist_fine_tuning_jobs = self.factory_function(
+ litellm.alist_fine_tuning_jobs, call_type="alist_fine_tuning_jobs"
+ )
+ self.aretrieve_fine_tuning_job = self.factory_function(
+ litellm.aretrieve_fine_tuning_job, call_type="aretrieve_fine_tuning_job"
+ )
+ self.afile_list = self.factory_function(
+ litellm.afile_list, call_type="alist_files"
+ )
+ self.aimage_edit = self.factory_function(
+ litellm.aimage_edit, call_type="aimage_edit"
+ )
+ self.allm_passthrough_route = self.factory_function(
+ litellm.allm_passthrough_route, call_type="allm_passthrough_route"
+ )
+ self.acancel_batch = self.factory_function(
+ litellm.acancel_batch, call_type="acancel_batch"
+ )
def _initialize_specialized_endpoints(self):
"""Helper to initialize specialized router endpoints (vector store, OCR, search, video, container)."""
from litellm.vector_stores.main import acreate, asearch, create, search
- self.avector_store_search = self.factory_function(asearch, call_type="avector_store_search")
- self.avector_store_create = self.factory_function(acreate, call_type="avector_store_create")
- self.vector_store_search = self.factory_function(search, call_type="vector_store_search")
- self.vector_store_create = self.factory_function(create, call_type="vector_store_create")
+
+ self.avector_store_search = self.factory_function(
+ asearch, call_type="avector_store_search"
+ )
+ self.avector_store_create = self.factory_function(
+ acreate, call_type="avector_store_create"
+ )
+ self.vector_store_search = self.factory_function(
+ search, call_type="vector_store_search"
+ )
+ self.vector_store_create = self.factory_function(
+ create, call_type="vector_store_create"
+ )
from litellm.google_genai import (
agenerate_content,
@@ -809,16 +854,27 @@ def _initialize_specialized_endpoints(self):
generate_content,
generate_content_stream,
)
- self.agenerate_content = self.factory_function(agenerate_content, call_type="agenerate_content")
- self.generate_content = self.factory_function(generate_content, call_type="generate_content")
- self.agenerate_content_stream = self.factory_function(agenerate_content_stream, call_type="agenerate_content_stream")
- self.generate_content_stream = self.factory_function(generate_content_stream, call_type="generate_content_stream")
+
+ self.agenerate_content = self.factory_function(
+ agenerate_content, call_type="agenerate_content"
+ )
+ self.generate_content = self.factory_function(
+ generate_content, call_type="generate_content"
+ )
+ self.agenerate_content_stream = self.factory_function(
+ agenerate_content_stream, call_type="agenerate_content_stream"
+ )
+ self.generate_content_stream = self.factory_function(
+ generate_content_stream, call_type="generate_content_stream"
+ )
from litellm.ocr import aocr, ocr
+
self.aocr = self.factory_function(aocr, call_type="aocr")
self.ocr = self.factory_function(ocr, call_type="ocr")
from litellm.search import asearch, search
+
self.asearch = self.factory_function(asearch, call_type="asearch")
self.search = self.factory_function(search, call_type="search")
@@ -834,15 +890,30 @@ def _initialize_specialized_endpoints(self):
video_remix,
video_status,
)
- self.avideo_generation = self.factory_function(avideo_generation, call_type="avideo_generation")
- self.video_generation = self.factory_function(video_generation, call_type="video_generation")
+
+ self.avideo_generation = self.factory_function(
+ avideo_generation, call_type="avideo_generation"
+ )
+ self.video_generation = self.factory_function(
+ video_generation, call_type="video_generation"
+ )
self.avideo_list = self.factory_function(avideo_list, call_type="avideo_list")
self.video_list = self.factory_function(video_list, call_type="video_list")
- self.avideo_status = self.factory_function(avideo_status, call_type="avideo_status")
- self.video_status = self.factory_function(video_status, call_type="video_status")
- self.avideo_content = self.factory_function(avideo_content, call_type="avideo_content")
- self.video_content = self.factory_function(video_content, call_type="video_content")
- self.avideo_remix = self.factory_function(avideo_remix, call_type="avideo_remix")
+ self.avideo_status = self.factory_function(
+ avideo_status, call_type="avideo_status"
+ )
+ self.video_status = self.factory_function(
+ video_status, call_type="video_status"
+ )
+ self.avideo_content = self.factory_function(
+ avideo_content, call_type="avideo_content"
+ )
+ self.video_content = self.factory_function(
+ video_content, call_type="video_content"
+ )
+ self.avideo_remix = self.factory_function(
+ avideo_remix, call_type="avideo_remix"
+ )
self.video_remix = self.factory_function(video_remix, call_type="video_remix")
from litellm.containers import (
@@ -855,14 +926,31 @@ def _initialize_specialized_endpoints(self):
list_containers,
retrieve_container,
)
- self.acreate_container = self.factory_function(acreate_container, call_type="acreate_container")
- self.create_container = self.factory_function(create_container, call_type="create_container")
- self.alist_containers = self.factory_function(alist_containers, call_type="alist_containers")
- self.list_containers = self.factory_function(list_containers, call_type="list_containers")
- self.aretrieve_container = self.factory_function(aretrieve_container, call_type="aretrieve_container")
- self.retrieve_container = self.factory_function(retrieve_container, call_type="retrieve_container")
- self.adelete_container = self.factory_function(adelete_container, call_type="adelete_container")
- self.delete_container = self.factory_function(delete_container, call_type="delete_container")
+
+ self.acreate_container = self.factory_function(
+ acreate_container, call_type="acreate_container"
+ )
+ self.create_container = self.factory_function(
+ create_container, call_type="create_container"
+ )
+ self.alist_containers = self.factory_function(
+ alist_containers, call_type="alist_containers"
+ )
+ self.list_containers = self.factory_function(
+ list_containers, call_type="list_containers"
+ )
+ self.aretrieve_container = self.factory_function(
+ aretrieve_container, call_type="aretrieve_container"
+ )
+ self.retrieve_container = self.factory_function(
+ retrieve_container, call_type="retrieve_container"
+ )
+ self.adelete_container = self.factory_function(
+ adelete_container, call_type="adelete_container"
+ )
+ self.delete_container = self.factory_function(
+ delete_container, call_type="delete_container"
+ )
def initialize_router_endpoints(self):
self._initialize_core_endpoints()
@@ -1226,7 +1314,10 @@ async def stream_with_fallbacks():
async def _acompletion(
self, model: str, messages: List[Dict[str, str]], **kwargs
- ) -> Union[ModelResponse, CustomStreamWrapper,]:
+ ) -> Union[
+ ModelResponse,
+ CustomStreamWrapper,
+ ]:
"""
- Get an available deployment
- call it with a semaphore over the call
@@ -2694,21 +2785,19 @@ async def _aadapter_completion(self, adapter_id: str, model: str, **kwargs):
self.fail_calls[model] += 1
raise e
- async def _asearch_with_fallbacks(
- self, original_function: Callable, **kwargs
- ):
+ async def _asearch_with_fallbacks(self, original_function: Callable, **kwargs):
"""
Helper function to make a search API call through the router with load balancing and fallbacks.
Reuses the router's retry/fallback infrastructure.
"""
from litellm.router_utils.search_api_router import SearchAPIRouter
-
+
return await SearchAPIRouter.async_search_with_fallbacks(
router_instance=self,
original_function=original_function,
**kwargs,
)
-
+
async def _asearch_with_fallbacks_helper(
self, model: str, original_generic_function: Callable, **kwargs
):
@@ -2717,7 +2806,7 @@ async def _asearch_with_fallbacks_helper(
Called by async_function_with_fallbacks for each retry attempt.
"""
from litellm.router_utils.search_api_router import SearchAPIRouter
-
+
return await SearchAPIRouter.async_search_with_fallbacks_helper(
router_instance=self,
model=model,
@@ -2755,11 +2844,9 @@ async def _ageneric_api_call_with_fallbacks(
)
)
raise e
-
+
def _add_deployment_model_to_endpoint_for_llm_passthrough_route(
- self, kwargs: Dict[str, Any],
- model: str,
- model_name: str
+ self, kwargs: Dict[str, Any], model: str, model_name: str
) -> Dict[str, Any]:
"""
Add the deployment model to the endpoint for LLM passthrough route.
@@ -2771,7 +2858,7 @@ def _add_deployment_model_to_endpoint_for_llm_passthrough_route(
# For provider-specific endpoints, strip the provider prefix from model_name
# e.g., "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0" -> "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
from litellm import get_llm_provider
-
+
try:
# get_llm_provider returns (model_without_prefix, provider, api_key, api_base)
stripped_model_name, _, _, _ = get_llm_provider(
@@ -2783,8 +2870,10 @@ def _add_deployment_model_to_endpoint_for_llm_passthrough_route(
except Exception:
# If get_llm_provider fails, fall back to using model_name as-is
replacement_model_name = model_name
-
- kwargs["endpoint"] = kwargs["endpoint"].replace(model, replacement_model_name)
+
+ kwargs["endpoint"] = kwargs["endpoint"].replace(
+ model, replacement_model_name
+ )
return kwargs
async def _ageneric_api_call_with_fallbacks_helper(
@@ -2818,7 +2907,9 @@ async def _ageneric_api_call_with_fallbacks_helper(
model_name = data["model"]
self.total_calls[model_name] += 1
- self._add_deployment_model_to_endpoint_for_llm_passthrough_route(kwargs=kwargs, model=model, model_name=model_name)
+ self._add_deployment_model_to_endpoint_for_llm_passthrough_route(
+ kwargs=kwargs, model=model, model_name=model_name
+ )
### get custom
response = original_generic_function(
**{
@@ -3247,9 +3338,9 @@ async def create_file_for_deployment(deployment: dict) -> OpenAIFileObject:
healthy_deployments=healthy_deployments, responses=responses
)
returned_response = cast(OpenAIFileObject, responses[0])
- returned_response._hidden_params[
- "model_file_id_mapping"
- ] = model_file_id_mapping
+ returned_response._hidden_params["model_file_id_mapping"] = (
+ model_file_id_mapping
+ )
return returned_response
except Exception as e:
verbose_router_logger.exception(
@@ -3582,6 +3673,7 @@ def factory_function(
"afile_delete",
"afile_content",
"_arealtime",
+ "acancel_batch",
"acreate_fine_tuning_job",
"acancel_fine_tuning_job",
"alist_fine_tuning_jobs",
@@ -3620,7 +3712,7 @@ def factory_function(
"aretrieve_container",
"retrieve_container",
"adelete_container",
- "delete_container"
+ "delete_container",
] = "assistants",
):
"""
@@ -3706,6 +3798,7 @@ async def async_wrapper(
"alist_containers",
"aretrieve_container",
"adelete_container",
+ "acancel_batch",
):
return await self._ageneric_api_call_with_fallbacks(
original_function=original_function,
@@ -3862,11 +3955,11 @@ async def async_function_with_fallbacks_common_utils( # noqa: PLR0915
if isinstance(e, litellm.ContextWindowExceededError):
if context_window_fallbacks is not None:
- context_window_fallback_model_group: Optional[
- List[str]
- ] = self._get_fallback_model_group_from_fallbacks(
- fallbacks=context_window_fallbacks,
- model_group=model_group,
+ context_window_fallback_model_group: Optional[List[str]] = (
+ self._get_fallback_model_group_from_fallbacks(
+ fallbacks=context_window_fallbacks,
+ model_group=model_group,
+ )
)
if context_window_fallback_model_group is None:
raise original_exception
@@ -3898,11 +3991,11 @@ async def async_function_with_fallbacks_common_utils( # noqa: PLR0915
e.message += "\n{}".format(error_message)
elif isinstance(e, litellm.ContentPolicyViolationError):
if content_policy_fallbacks is not None:
- content_policy_fallback_model_group: Optional[
- List[str]
- ] = self._get_fallback_model_group_from_fallbacks(
- fallbacks=content_policy_fallbacks,
- model_group=model_group,
+ content_policy_fallback_model_group: Optional[List[str]] = (
+ self._get_fallback_model_group_from_fallbacks(
+ fallbacks=content_policy_fallbacks,
+ model_group=model_group,
+ )
)
if content_policy_fallback_model_group is None:
raise original_exception
@@ -4620,7 +4713,7 @@ def deployment_callback_on_failure(
try:
exception = kwargs.get("exception", None)
exception_status = getattr(exception, "status_code", "")
-
+
# Cache litellm_params to avoid repeated dict lookups
litellm_params = kwargs.get("litellm_params", {})
_model_info = litellm_params.get("model_info", {})
@@ -5144,26 +5237,26 @@ def init_auto_router_deployment(self, deployment: Deployment):
"""
from litellm.router_strategy.auto_router.auto_router import AutoRouter
- auto_router_config_path: Optional[
- str
- ] = deployment.litellm_params.auto_router_config_path
+ auto_router_config_path: Optional[str] = (
+ deployment.litellm_params.auto_router_config_path
+ )
auto_router_config: Optional[str] = deployment.litellm_params.auto_router_config
if auto_router_config_path is None and auto_router_config is None:
raise ValueError(
"auto_router_config_path or auto_router_config is required for auto-router deployments. Please set it in the litellm_params"
)
- default_model: Optional[
- str
- ] = deployment.litellm_params.auto_router_default_model
+ default_model: Optional[str] = (
+ deployment.litellm_params.auto_router_default_model
+ )
if default_model is None:
raise ValueError(
"auto_router_default_model is required for auto-router deployments. Please set it in the litellm_params"
)
- embedding_model: Optional[
- str
- ] = deployment.litellm_params.auto_router_embedding_model
+ embedding_model: Optional[str] = (
+ deployment.litellm_params.auto_router_embedding_model
+ )
if embedding_model is None:
raise ValueError(
"auto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params"
@@ -5269,7 +5362,7 @@ def set_model_list(self, model_list: list):
f"\nInitialized Model List {self.get_model_names()}"
)
self.model_names = {m["model_name"] for m in model_list}
-
+
# Note: model_name_to_deployment_indices is already built incrementally
# by _create_deployment -> _add_model_to_list_and_index_map
@@ -5494,13 +5587,13 @@ def _update_deployment_indices_after_removal(
# Remove the deleted model from index
if model_id in self.model_id_to_deployment_index_map:
del self.model_id_to_deployment_index_map[model_id]
-
+
# Update model_name_to_deployment_indices
for model_name, indices in list(self.model_name_to_deployment_indices.items()):
# Remove the deleted index
if removal_idx in indices:
indices.remove(removal_idx)
-
+
# Decrement all indices greater than removal_idx
updated_indices = []
for idx in indices:
@@ -5508,7 +5601,7 @@ def _update_deployment_indices_after_removal(
updated_indices.append(idx - 1)
else:
updated_indices.append(idx)
-
+
# Update or remove the entry
if len(updated_indices) > 0:
self.model_name_to_deployment_indices[model_name] = updated_indices
@@ -5527,13 +5620,13 @@ def _add_model_to_list_and_index_map(
"""
idx = len(self.model_list)
self.model_list.append(model)
-
+
# Update model_id index for O(1) lookup
if model_id is not None:
self.model_id_to_deployment_index_map[model_id] = idx
elif model.get("model_info", {}).get("id") is not None:
self.model_id_to_deployment_index_map[model["model_info"]["id"]] = idx
-
+
# Update model_name index for O(1) lookup
model_name = model.get("model_name")
if model_name:
@@ -5653,7 +5746,7 @@ def get_deployment_by_model_group_name(
Returns -> Deployment or None
Raise Exception -> if model found in invalid format
-
+
Optimized with O(1) index lookup instead of O(n) linear scan.
"""
# O(1) lookup in model_name index
@@ -5771,7 +5864,7 @@ def get_model_info(self, id: str) -> Optional[dict]:
Returns
- dict: the model in list with 'model_name', 'litellm_params', Optional['model_info']
- None: could not find deployment in list
-
+
Optimized with O(1) index lookup instead of O(n) linear scan.
"""
# O(1) lookup via model_id_to_deployment_index_map
@@ -5886,11 +5979,11 @@ def _set_model_group_info( # noqa: PLR0915
configurable_clientside_auth_params = (
litellm_params.configurable_clientside_auth_params
)
-
+
# Cache nested dict access to avoid repeated temporary dict allocations
model_litellm_params = model.get("litellm_params", {})
model_info_dict = model.get("model_info", {})
-
+
# get model tpm
_deployment_tpm: Optional[int] = None
if _deployment_tpm is None:
@@ -6266,12 +6359,12 @@ async def set_response_headers(
def _build_model_name_index(self, model_list: list) -> None:
"""
Build model_name -> deployment indices mapping for O(1) lookups.
-
+
This index allows us to find all deployments for a given model_name in O(1) time
instead of O(n) linear scan through the entire model_list.
"""
self.model_name_to_deployment_indices.clear()
-
+
for idx, model in enumerate(model_list):
model_name = model.get("model_name")
if model_name:
@@ -6311,12 +6404,12 @@ def get_model_ids(
if 'model_name' is none, returns all.
Returns list of model id's.
-
+
Optimized with O(1) or O(k) index lookup when model_name provided,
instead of O(n) linear scan.
- """
+ """
ids = []
-
+
if model_name is not None:
# O(1) lookup in model_name index, then O(k) iteration where k = deployments for this model_name
if model_name in self.model_name_to_deployment_indices:
@@ -6337,7 +6430,7 @@ def get_model_ids(
if exclude_team_models and model["model_info"].get("team_id"):
continue
ids.append(model_id)
-
+
return ids
def has_model_id(self, candidate_id: str) -> bool:
@@ -6399,15 +6492,15 @@ def _get_all_deployments(
Used for accurate 'get_model_list'.
if team_id specified, only return team-specific models
-
+
Optimized with O(1) index lookup instead of O(n) linear scan.
"""
returned_models: List[DeploymentTypedDict] = []
-
+
# O(1) lookup in model_name index
if model_name in self.model_name_to_deployment_indices:
indices = self.model_name_to_deployment_indices[model_name]
-
+
# O(k) where k = deployments for this model_name (typically 1-10)
for idx in indices:
model = self.model_list[idx]
@@ -6556,9 +6649,7 @@ def get_model_list(
potential_team_only_wildcard_models = (
self.team_pattern_routers[team_id].route(model_name) or []
)
- potential_wildcard_models.extend(
- potential_team_only_wildcard_models
- )
+ potential_wildcard_models.extend(potential_team_only_wildcard_models)
if model_name is not None and potential_wildcard_models is not None:
for m in potential_wildcard_models:
@@ -6821,7 +6912,7 @@ def _pre_call_checks( # noqa: PLR0915
# Cache nested dict access to avoid repeated temporary dict allocations
_litellm_params = deployment.get("litellm_params", {})
_model_info = deployment.get("model_info", {})
-
+
# see if we have the info for this model
try:
base_model = _model_info.get("base_model", None)
@@ -6949,7 +7040,9 @@ def _pre_call_checks( # noqa: PLR0915
if len(invalid_model_indices) > 0:
# Single-pass filter using set for O(1) lookups (avoids O(n^2) from repeated pops)
_returned_deployments = [
- d for i, d in enumerate(_returned_deployments) if i not in invalid_model_indices
+ d
+ for i, d in enumerate(_returned_deployments)
+ if i not in invalid_model_indices
]
## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
@@ -7505,7 +7598,8 @@ def _filter_cooldown_deployments(
# Convert to set for O(1) lookup and use list comprehension for O(n) filtering
cooldown_set = set(cooldown_deployments)
return [
- deployment for deployment in healthy_deployments
+ deployment
+ for deployment in healthy_deployments
if deployment["model_info"]["id"] not in cooldown_set
]