diff --git a/enterprise/litellm_enterprise/proxy/hooks/managed_files.py b/enterprise/litellm_enterprise/proxy/hooks/managed_files.py
index c55a4f038981..e3598d12c685 100644
--- a/enterprise/litellm_enterprise/proxy/hooks/managed_files.py
+++ b/enterprise/litellm_enterprise/proxy/hooks/managed_files.py
@@ -498,7 +498,6 @@ async def get_model_file_id_mapping(
         for file_id in file_ids:
             ## CHECK IF FILE ID IS MANAGED BY LITELM
             is_base64_unified_file_id = _is_base64_encoded_unified_file_id(file_id)
-
             if is_base64_unified_file_id:
                 litellm_managed_file_ids.append(file_id)
 
@@ -509,6 +508,7 @@ async def get_model_file_id_mapping(
                 unified_file_object = await self.get_unified_file_id(
                     file_id, litellm_parent_otel_span
                 )
+
                 if unified_file_object:
                     file_id_mapping[file_id] = unified_file_object.model_mappings
 
@@ -784,18 +784,21 @@ async def afile_delete(
         llm_router: Router,
         **data: Dict,
     ) -> OpenAIFileObject:
-        file_id = convert_b64_uid_to_unified_uid(file_id)
+
+        # file_id = convert_b64_uid_to_unified_uid(file_id)
         model_file_id_mapping = await self.get_model_file_id_mapping(
             [file_id], litellm_parent_otel_span
         )
+
         specific_model_file_id_mapping = model_file_id_mapping.get(file_id)
         if specific_model_file_id_mapping:
-            for model_id, file_id in specific_model_file_id_mapping.items():
-                await llm_router.afile_delete(model=model_id, file_id=file_id, **data)  # type: ignore
+            for model_id, model_file_id in specific_model_file_id_mapping.items():
+                await llm_router.afile_delete(model=model_id, file_id=model_file_id, **data)  # type: ignore
 
         stored_file_object = await self.delete_unified_file_id(
             file_id, litellm_parent_otel_span
         )
+
         if stored_file_object:
             return stored_file_object
         else:
@@ -816,6 +819,7 @@ async def afile_content(
             model_file_id_mapping
             or await self.get_model_file_id_mapping([file_id], litellm_parent_otel_span)
         )
+
         specific_model_file_id_mapping = model_file_id_mapping.get(file_id)
 
         if specific_model_file_id_mapping:
diff --git a/litellm/batches/main.py b/litellm/batches/main.py
index 48521e5fba01..5279dd70bc42 100644
--- a/litellm/batches/main.py
+++ b/litellm/batches/main.py
@@ -17,6 +17,8 @@
 from typing import Any, Coroutine, Dict, Literal, Optional, Union, cast
 
 import httpx
+from openai.types.batch import BatchRequestCounts
+from openai.types.batch import Metadata as BatchMetadata
 
 import litellm
 from litellm._logging import verbose_logger
@@ -223,10 +225,12 @@ def create_batch(
                 api_key=optional_params.api_key,
                 logging_obj=litellm_logging_obj,
                 _is_async=_is_async,
-                client=client
-                if client is not None
-                and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
-                else None,
+                client=(
+                    client
+                    if client is not None
+                    and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
+                    else None
+                ),
                 timeout=timeout,
                 model=model,
             )
@@ -609,10 +613,12 @@ def retrieve_batch(
                     function_id="batch_retrieve",
                 ),
                 _is_async=_is_async,
-                client=client
-                if client is not None
-                and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
-                else None,
+                client=(
+                    client
+                    if client is not None
+                    and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
+                    else None
+                ),
                 timeout=timeout,
                 model=model,
             )
@@ -799,6 +805,7 @@ def list_batches(
 
 async def acancel_batch(
     batch_id: str,
+    model: Optional[str] = None,
     custom_llm_provider: Literal["openai", "azure"] = "openai",
     metadata: Optional[Dict[str, str]] = None,
     extra_headers: Optional[Dict[str, str]] = None,
@@ -813,11 +820,13 @@ async def acancel_batch(
     try:
         loop = asyncio.get_event_loop()
         kwargs["acancel_batch"] = True
+        model = kwargs.pop("model", None)
 
         # Use a partial function to pass your keyword arguments
         func = partial(
             cancel_batch,
             batch_id,
+            model,
             custom_llm_provider,
             metadata,
             extra_headers,
@@ -840,7 +849,8 @@ async def acancel_batch(
 
 def cancel_batch(
     batch_id: str,
-    custom_llm_provider: Literal["openai", "azure"] = "openai",
+    model: Optional[str] = None,
+    custom_llm_provider: Union[Literal["openai", "azure"], str] = "openai",
     metadata: Optional[Dict[str, str]] = None,
     extra_headers: Optional[Dict[str, str]] = None,
     extra_body: Optional[Dict[str, str]] = None,
@@ -852,6 +862,17 @@ def cancel_batch(
     LiteLLM Equivalent of POST https://api.openai.com/v1/batches/{batch_id}/cancel
     """
     try:
+
+        try:
+            if model is not None:
+                _, custom_llm_provider, _, _ = get_llm_provider(
+                    model=model,
+                    custom_llm_provider=custom_llm_provider,
+                )
+        except Exception as e:
+            verbose_logger.exception(
+                f"litellm.batches.main.py::cancel_batch() - Error inferring custom_llm_provider - {str(e)}"
+            )
         optional_params = GenericLiteLLMParams(**kwargs)
         litellm_params = get_litellm_params(
             custom_llm_provider=custom_llm_provider,
@@ -1005,21 +1026,28 @@ async def _async_get_status():
             created_at=status_response["submitTime"],
             in_progress_at=status_response["lastModifiedTime"],
             completed_at=status_response.get("endTime"),
-            failed_at=status_response.get("endTime")
-            if status_response["status"] == "failed"
-            else None,
-            request_counts={
-                "total": 1,
-                "completed": 1 if status_response["status"] == "completed" else 0,
-                "failed": 1 if status_response["status"] == "failed" else 0,
-            },
-            metadata={
-                "output_file_id": status_response["outputDataConfig"][
-                    "s3OutputDataConfig"
-                ]["s3Uri"],
-                "failure_message": status_response.get("failureMessage"),
-                "model_arn": status_response["modelArn"],
-            },
+            failed_at=(
+                status_response.get("endTime")
+                if status_response["status"] == "failed"
+                else None
+            ),
+            request_counts=BatchRequestCounts(
+                total=1,
+                completed=1 if status_response["status"] == "completed" else 0,
+                failed=1 if status_response["status"] == "failed" else 0,
+            ),
+            metadata=dict(
+                **{
+                    "output_file_id": status_response["outputDataConfig"][
+                        "s3OutputDataConfig"
+                    ]["s3Uri"],
+                    "failure_message": status_response.get("failureMessage") or "",
+                    "model_arn": status_response["modelArn"],
+                }
+            ),
+            completion_window="24h",
+            endpoint="/v1/embeddings",
+            input_file_id="",
         )
 
         return result
diff --git a/litellm/files/main.py b/litellm/files/main.py
index 9c85fa105653..535772fa42c0 100644
--- a/litellm/files/main.py
+++ b/litellm/files/main.py
@@ -95,7 +95,9 @@ async def acreate_file(
 def create_file(
     file: FileTypes,
     purpose: Literal["assistants", "batch", "fine-tune"],
-    custom_llm_provider: Optional[Literal["openai", "azure", "vertex_ai", "bedrock"]] = None,
+    custom_llm_provider: Optional[
+        Literal["openai", "azure", "vertex_ai", "bedrock"]
+    ] = None,
     extra_headers: Optional[Dict[str, str]] = None,
     extra_body: Optional[Dict[str, str]] = None,
     **kwargs,
@@ -155,10 +157,12 @@ def create_file(
                 api_key=optional_params.api_key,
                 logging_obj=logging_obj,
                 _is_async=_is_async,
-                client=client
-                if client is not None
-                and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
-                else None,
+                client=(
+                    client
+                    if client is not None
+                    and isinstance(client, (HTTPHandler, AsyncHTTPHandler))
+                    else None
+                ),
                 timeout=timeout,
             )
         elif custom_llm_provider == "openai":
@@ -441,12 +445,14 @@ async def afile_delete(
     """
     try:
         loop = asyncio.get_event_loop()
+        model = kwargs.pop("model", None)
         kwargs["is_async"] = True
 
         # Use a partial function to pass your keyword arguments
         func = partial(
             file_delete,
             file_id,
+            model,
             custom_llm_provider,
             extra_headers,
             extra_body,
@@ -470,7 +476,8 @@ async def afile_delete(
 @client
 def file_delete(
     file_id: str,
-    custom_llm_provider: Literal["openai", "azure"] = "openai",
+    model: Optional[str] = None,
+    custom_llm_provider: Union[Literal["openai", "azure"], str] = "openai",
     extra_headers: Optional[Dict[str, str]] = None,
     extra_body: Optional[Dict[str, str]] = None,
     **kwargs,
@@ -481,6 +488,13 @@ def file_delete(
     LiteLLM Equivalent of DELETE https://api.openai.com/v1/files
     """
     try:
+        try:
+            if model is not None:
+                _, custom_llm_provider, _, _ = get_llm_provider(
+                    model, custom_llm_provider
+                )
+        except Exception:
+            pass
         optional_params = GenericLiteLLMParams(**kwargs)
         litellm_params_dict = get_litellm_params(**kwargs)
         ### TIMEOUT LOGIC ###
@@ -566,7 +580,7 @@ def file_delete(
             )
         else:
             raise litellm.exceptions.BadRequestError(
-                message="LiteLLM doesn't support {} for 'create_batch'. Only 'openai' is supported.".format(
+                message="LiteLLM doesn't support {} for 'delete_batch'. Only 'openai' is supported.".format(
                     custom_llm_provider
                 ),
                 model="n/a",
diff --git a/litellm/proxy/_experimental/out/api-reference.html b/litellm/proxy/_experimental/out/api-reference/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/api-reference.html
rename to litellm/proxy/_experimental/out/api-reference/index.html
diff --git a/litellm/proxy/_experimental/out/guardrails.html b/litellm/proxy/_experimental/out/guardrails.html
deleted file mode 100644
index 3929c49a29bc..000000000000
--- a/litellm/proxy/_experimental/out/guardrails.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/litellm-asset-prefix/_next/static/chunks/webpack-db32e14ed3b4a921.js"/><script src="/litellm-asset-prefix/_next/static/chunks/fd9d1056-64e84c4092e0c5d7.js" async=""></script><script src="/litellm-asset-prefix/_next/static/chunks/2117-2da5ec8fad6a6c25.js" async=""></script><script src="/litellm-asset-prefix/_next/static/chunks/main-app-77a6ca3c04ee9adf.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/favicon.ico" type="image/x-icon" sizes="16x16"/><link rel="icon" href="./favicon.ico"/><meta name="next-size-adjust"/><script src="/litellm-asset-prefix/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/litellm-asset-prefix/_next/static/chunks/webpack-db32e14ed3b4a921.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/litellm-asset-prefix/_next/static/media/e4af272ccee01ff0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/litellm-asset-prefix/_next/static/css/349654da14372cd9.css\",\"style\"]\n3:HL[\"/litellm-asset-prefix/_next/static/css/4325585f0626380f.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[49514,[\"1114\",\"static/chunks/1114-744a38eea84cb2ab.js\",\"1491\",\"static/chunks/1491-8280340b5391aa11.js\",\"4556\",\"static/chunks/4556-6a2e7184dcc130ca.js\",\"2417\",\"static/chunks/2417-b28f330f82bf4a82.js\",\"2926\",\"static/chunks/2926-b5dcad9f62f5e2b1.js\",\"3709\",\"static/chunks/3709-b17db86e0ab325d2.js\",\"1529\",\"static/chunks/1529-e0933e3af843b646.js\",\"9775\",\"static/chunks/9775-ac7313139c5f089d.js\",\"2525\",\"static/chunks/2525-13b137f40949dcf1.js\",\"2284\",\"static/chunks/2284-4cbc9a7f33eb7c89.js\",\"7908\",\"static/chunks/7908-07a76cfe29c543c9.js\",\"3603\",\"static/chunks/3603-b101c17ea3d68f19.js\",\"9011\",\"static/chunks/9011-0769cb78a6e5f233.js\",\"5319\",\"static/chunks/5319-43bd4ee5bb7e50d1.js\",\"4366\",\"static/chunks/4366-cc3fb310f1498d39.js\",\"8077\",\"static/chunks/8077-1d7628cad377f1b7.js\",\"169\",\"static/chunks/169-07530b6fecd36167.js\",\"6455\",\"static/chunks/6455-4e6e27a5b944326d.js\",\"8049\",\"static/chunks/8049-fa387b8b81eeb482.js\",\"1368\",\"static/chunks/1368-ff707caf2f45c327.js\",\"6607\",\"static/chunks/app/(dashboard)/guardrails/page-2fe243391b449733.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\na:I[89219,[\"1114\",\"static/chunks/1114-744a38eea84cb2ab.js\",\"1491\",\"static/chunks/1491-8280340b5391aa11.js\",\"4556\",\"static/chunks/4556-6a2e7184dcc130ca.js\",\"3709\",\"static/chunks/3709-b17db86e0ab325d2.js\",\"1529\",\"static/chunks/1529-e0933e3af843b646.js\",\"3603\",\"static/chunks/3603-b101c17ea3d68f19.js\",\"9165\",\"static/chunks/9165-2a738a73d0d5f1d1.js\",\"8098\",\"static/chunks/8098-3da3212991542668.js\",\"8049\",\"static/chunks/8049-fa387b8b81eeb482.js\",\"2019\",\"static/chunks/2019-0c9cdab17a49595e.js\",\"5642\",\"static/chunks/app/(dashboard)/layout-faabb69b16fe8e14.js\"],\"default\",1]\nc:I[31857,[\"1114\",\"static/chunks/1114-744a38eea84cb2ab.js\",\"1491\",\"static/chunks/1491-8280340b5391aa11.js\",\"8049\",\"static/chunks/8049-fa387b8b81eeb482.js\",\"3185\",\"static/chunks/app/layout-c2db9569fbe0c914.js\"],\"FeatureFlagsProvider\"]\n12:I[61060,[],\"\"]\nb:{}\nd:{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple"])</script><script>self.__next_f.push([1," Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"}\ne:{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"}\nf:{\"display\":\"inline-block\"}\n10:{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0}\n13:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Kr2ZSEAhEKXKx5iH2nNbH\",\"assetPrefix\":\"/litellm-asset-prefix\",\"urlParts\":[\"\",\"guardrails\"],\"initialTree\":[\"\",{\"children\":[\"(dashboard)\",{\"children\":[\"guardrails\",{\"children\":[\"__PAGE__\",{}]}]}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"(dashboard)\",{\"children\":[\"guardrails\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[null,[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"(dashboard)\",\"children\",\"guardrails\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\"}]],null]},[[null,[\"$\",\"$La\",null,{\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"(dashboard)\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}],\"params\":\"$b\"}]],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/litellm-asset-prefix/_next/static/css/349654da14372cd9.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/litellm-asset-prefix/_next/static/css/4325585f0626380f.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_1c856b\",\"children\":[\"$\",\"$Lc\",null,{\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$d\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$e\",\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":\"$f\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$10\",\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$L11\"],\"globalErrorComponent\":\"$12\",\"missingSlots\":\"$W13\"}]\n"])</script><script>self.__next_f.push([1,"11:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"link\",\"5\",{\"rel\":\"icon\",\"href\":\"./favicon.ico\"}],[\"$\",\"meta\",\"6\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/logs.html b/litellm/proxy/_experimental/out/logs/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/logs.html
rename to litellm/proxy/_experimental/out/logs/index.html
diff --git a/litellm/proxy/_experimental/out/model-hub.html b/litellm/proxy/_experimental/out/model-hub/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/model-hub.html
rename to litellm/proxy/_experimental/out/model-hub/index.html
diff --git a/litellm/proxy/_experimental/out/model_hub_table.html b/litellm/proxy/_experimental/out/model_hub_table/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/model_hub_table.html
rename to litellm/proxy/_experimental/out/model_hub_table/index.html
diff --git a/litellm/proxy/_experimental/out/models-and-endpoints.html b/litellm/proxy/_experimental/out/models-and-endpoints/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/models-and-endpoints.html
rename to litellm/proxy/_experimental/out/models-and-endpoints/index.html
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index e38fb5467311..000000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/litellm-asset-prefix/_next/static/chunks/webpack-db32e14ed3b4a921.js"/><script src="/litellm-asset-prefix/_next/static/chunks/fd9d1056-64e84c4092e0c5d7.js" async=""></script><script src="/litellm-asset-prefix/_next/static/chunks/2117-2da5ec8fad6a6c25.js" async=""></script><script src="/litellm-asset-prefix/_next/static/chunks/main-app-77a6ca3c04ee9adf.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/favicon.ico" type="image/x-icon" sizes="16x16"/><link rel="icon" href="./favicon.ico"/><meta name="next-size-adjust"/><script src="/litellm-asset-prefix/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/litellm-asset-prefix/_next/static/chunks/webpack-db32e14ed3b4a921.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/litellm-asset-prefix/_next/static/media/e4af272ccee01ff0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/litellm-asset-prefix/_next/static/css/349654da14372cd9.css\",\"style\"]\n3:HL[\"/litellm-asset-prefix/_next/static/css/4325585f0626380f.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[12011,[\"3665\",\"static/chunks/3014691f-702e24806fe9cec4.js\",\"1114\",\"static/chunks/1114-744a38eea84cb2ab.js\",\"1491\",\"static/chunks/1491-8280340b5391aa11.js\",\"4556\",\"static/chunks/4556-6a2e7184dcc130ca.js\",\"8806\",\"static/chunks/8806-85c2bcba4ca300e2.js\",\"8049\",\"static/chunks/8049-fa387b8b81eeb482.js\",\"8461\",\"static/chunks/app/onboarding/page-697f27baa647c23a.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\na:I[31857,[\"1114\",\"static/chunks/1114-744a38eea84cb2ab.js\",\"1491\",\"static/chunks/1491-8280340b5391aa11.js\",\"8049\",\"static/chunks/8049-fa387b8b81eeb482.js\",\"3185\",\"static/chunks/app/layout-c2db9569fbe0c914.js\"],\"FeatureFlagsProvider\"]\nc:I[61060,[],\"\"]\nd:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Kr2ZSEAhEKXKx5iH2nNbH\",\"assetPrefix\":\"/litellm-asset-prefix\",\"urlParts\":[\"\",\"onboarding\"],\"initialTree\":[\"\",{\"children\":[\"onboarding\",{\"children\":[\"__PAGE__\",{}]}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"onboarding\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[null,[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\",\"onboarding\",\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"notFoundStyles\":\"$undefined\"}]],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/litellm-asset-prefix/_next/static/css/349654da14372cd9.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/litellm-asset-prefix/_next/static/css/4325585f0626380f.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_1c856b\",\"children\":[\"$\",\"$La\",null,{\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$Lb\"],\"globalErrorComponent\":\"$c\",\"missingSlots\":\"$Wd\"}]\n"])</script><script>self.__next_f.push([1,"b:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"link\",\"5\",{\"rel\":\"icon\",\"href\":\"./favicon.ico\"}],[\"$\",\"meta\",\"6\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/organizations.html b/litellm/proxy/_experimental/out/organizations/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/organizations.html
rename to litellm/proxy/_experimental/out/organizations/index.html
diff --git a/litellm/proxy/_experimental/out/teams.html b/litellm/proxy/_experimental/out/teams/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/teams.html
rename to litellm/proxy/_experimental/out/teams/index.html
diff --git a/litellm/proxy/_experimental/out/test-key.html b/litellm/proxy/_experimental/out/test-key/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/test-key.html
rename to litellm/proxy/_experimental/out/test-key/index.html
diff --git a/litellm/proxy/_experimental/out/usage.html b/litellm/proxy/_experimental/out/usage/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/usage.html
rename to litellm/proxy/_experimental/out/usage/index.html
diff --git a/litellm/proxy/_experimental/out/users.html b/litellm/proxy/_experimental/out/users/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/users.html
rename to litellm/proxy/_experimental/out/users/index.html
diff --git a/litellm/proxy/_experimental/out/virtual-keys.html b/litellm/proxy/_experimental/out/virtual-keys/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/virtual-keys.html
rename to litellm/proxy/_experimental/out/virtual-keys/index.html
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 9ef9812dd490..ff3c3e219b87 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,21 +1,8 @@
 model_list:  
   - model_name: gpt-5-mini
     litellm_params:  
-      model: bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0
+      model: gpt-5-mini
   - model_name: embedding-model
     litellm_params:
       model: openai/text-embedding-3-large
 
-vector_store_registry:
-  - vector_store_name: "vertex-ai-litellm-website-knowledgebase"
-    litellm_params:
-      vector_store_id: "litellm-docs_1761094140318"
-      custom_llm_provider: "vertex_ai/search_api"
-      vertex_project: "test-vector-store-db"
-      vertex_location: "global"
-  - vector_store_name: "milvus-litellm-website-knowledgebase"
-    litellm_params:
-      vector_store_id: "can-be-anything"
-      custom_llm_provider: "milvus"
-      api_base: os.environ/MILVUS_API_BASE
-      api_key: os.environ/MILVUS_API_KEY
\ No newline at end of file
diff --git a/litellm/proxy/batches_endpoints/endpoints.py b/litellm/proxy/batches_endpoints/endpoints.py
index 2fc0298d1c83..ac7082edb690 100644
--- a/litellm/proxy/batches_endpoints/endpoints.py
+++ b/litellm/proxy/batches_endpoints/endpoints.py
@@ -22,6 +22,9 @@
 )
 from litellm.proxy.openai_files_endpoints.common_utils import (
     _is_base64_encoded_unified_file_id,
+    convert_b64_uid_to_unified_uid,
+    get_batch_id_from_unified_batch_id,
+    get_model_id_from_unified_batch_id,
     get_models_from_unified_file_id,
 )
 from litellm.proxy.utils import handle_exception_on_proxy, is_known_model
@@ -506,6 +509,7 @@ async def cancel_batch(
     from litellm.proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
+        llm_router,
         proxy_config,
         proxy_logging_obj,
         version,
@@ -517,6 +521,7 @@ async def cancel_batch(
         verbose_proxy_logger.debug(
             "Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
         )
+        unified_batch_id = _is_base64_encoded_unified_file_id(batch_id)
 
         # Include original request and headers in the data
         data = await add_litellm_data_to_request(
@@ -528,14 +533,36 @@ async def cancel_batch(
             proxy_config=proxy_config,
         )
 
-        custom_llm_provider = (
-            provider or data.pop("custom_llm_provider", None) or "openai"
-        )
-        _cancel_batch_data = CancelBatchRequest(batch_id=batch_id, **data)
-        response = await litellm.acancel_batch(
-            custom_llm_provider=custom_llm_provider,  # type: ignore
-            **_cancel_batch_data,
-        )
+        if unified_batch_id:
+            if llm_router is None:
+                raise HTTPException(
+                    status_code=500,
+                    detail={
+                        "error": "LLM Router not initialized. Ensure models added to proxy."
+                    },
+                )
+
+            model = (
+                get_model_id_from_unified_batch_id(unified_batch_id)
+                if unified_batch_id
+                else None
+            )
+
+            model_batch_id = get_batch_id_from_unified_batch_id(unified_batch_id)
+
+            data["batch_id"] = model_batch_id
+
+            response = await llm_router.acancel_batch(model=model, **data)  # type: ignore
+        else:
+
+            custom_llm_provider = (
+                provider or data.pop("custom_llm_provider", None) or "openai"
+            )
+            _cancel_batch_data = CancelBatchRequest(batch_id=batch_id, **data)
+            response = await litellm.acancel_batch(
+                custom_llm_provider=custom_llm_provider,  # type: ignore
+                **_cancel_batch_data,
+            )
 
         ### ALERTING ###
         asyncio.create_task(
diff --git a/litellm/proxy/openai_files_endpoints/files_endpoints.py b/litellm/proxy/openai_files_endpoints/files_endpoints.py
index 043b0c886e98..b0be54761364 100644
--- a/litellm/proxy/openai_files_endpoints/files_endpoints.py
+++ b/litellm/proxy/openai_files_endpoints/files_endpoints.py
@@ -31,8 +31,8 @@
 from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 from litellm.proxy.common_utils.openai_endpoint_utils import (
     get_custom_llm_provider_from_request_body,
-    get_custom_llm_provider_from_request_query,
     get_custom_llm_provider_from_request_headers,
+    get_custom_llm_provider_from_request_query,
 )
 from litellm.proxy.utils import ProxyLogging, is_known_model
 from litellm.router import Router
@@ -788,6 +788,7 @@ async def delete_file(
                     param="None",
                     code=500,
                 )
+
             response = await managed_files_obj.afile_delete(
                 file_id=file_id,
                 litellm_parent_otel_span=user_api_key_dict.parent_otel_span,
@@ -828,12 +829,11 @@ async def delete_file(
         await proxy_logging_obj.post_call_failure_hook(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
-        verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
+        verbose_proxy_logger.exception(
+            "litellm.proxy.proxy_server.delete_file(): Exception occured - {}".format(
                 str(e)
             )
         )
-        verbose_proxy_logger.debug(traceback.format_exc())
         if isinstance(e, HTTPException):
             raise ProxyException(
                 message=getattr(e, "message", str(e.detail)),
diff --git a/litellm/router.py b/litellm/router.py
index 1489de864886..21eb894c0ccd 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -153,11 +153,7 @@
 )
 from litellm.types.utils import ModelInfo
 from litellm.types.utils import ModelInfo as ModelMapInfo
-from litellm.types.utils import (
-    ModelResponseStream,
-    StandardLoggingPayload,
-    Usage,
-)
+from litellm.types.utils import ModelResponseStream, StandardLoggingPayload, Usage
 from litellm.utils import (
     CustomStreamWrapper,
     EmbeddingResponse,
@@ -357,6 +353,7 @@ def __init__(  # noqa: PLR0915
         self.enable_pre_call_checks = enable_pre_call_checks
         self.enable_tag_filtering = enable_tag_filtering
         from litellm._service_logger import ServiceLogging
+
         self.service_logger_obj: ServiceLogging = ServiceLogging()
         litellm.suppress_debug_info = True  # prevents 'Give Feedback/Get help' message from being emitted on Router - Relevant Issue: https://github.com/BerriAI/litellm/issues/5942
         if self.set_verbose is True:
@@ -375,9 +372,9 @@ def __init__(  # noqa: PLR0915
         )  # names of models under litellm_params. ex. azure/chatgpt-v-2
         self.deployment_latency_map = {}
         ### CACHING ###
-        cache_type: Literal[
-            "local", "redis", "redis-semantic", "s3", "disk"
-        ] = "local"  # default to an in-memory cache
+        cache_type: Literal["local", "redis", "redis-semantic", "s3", "disk"] = (
+            "local"  # default to an in-memory cache
+        )
         redis_cache = None
         cache_config: Dict[str, Any] = {}
 
@@ -419,9 +416,9 @@ def __init__(  # noqa: PLR0915
         self.default_max_parallel_requests = default_max_parallel_requests
         self.provider_default_deployment_ids: List[str] = []
         self.pattern_router = PatternMatchRouter()
-        self.team_pattern_routers: Dict[
-            str, PatternMatchRouter
-        ] = {}  # {"TEAM_ID": PatternMatchRouter}
+        self.team_pattern_routers: Dict[str, PatternMatchRouter] = (
+            {}
+        )  # {"TEAM_ID": PatternMatchRouter}
         self.auto_routers: Dict[str, "AutoRouter"] = {}
 
         # Initialize model_group_alias early since it's used in set_model_list
@@ -602,9 +599,9 @@ def __init__(  # noqa: PLR0915
                 )
             )
 
-        self.model_group_retry_policy: Optional[
-            Dict[str, RetryPolicy]
-        ] = model_group_retry_policy
+        self.model_group_retry_policy: Optional[Dict[str, RetryPolicy]] = (
+            model_group_retry_policy
+        )
 
         self.allowed_fails_policy: Optional[AllowedFailsPolicy] = None
         if allowed_fails_policy is not None:
@@ -708,9 +705,7 @@ def routing_strategy_init(
             routing_strategy == RoutingStrategy.LEAST_BUSY.value
             or routing_strategy == RoutingStrategy.LEAST_BUSY
         ):
-            self.leastbusy_logger = LeastBusyLoggingHandler(
-                router_cache=self.cache
-            )
+            self.leastbusy_logger = LeastBusyLoggingHandler(router_cache=self.cache)
             ## add callback
             if isinstance(litellm.input_callback, list):
                 litellm.input_callback.append(self.leastbusy_logger)  # type: ignore
@@ -774,34 +769,84 @@ def initialize_assistants_endpoint(self):
 
     def _initialize_core_endpoints(self):
         """Helper to initialize core router endpoints."""
-        self.amoderation = self.factory_function(litellm.amoderation, call_type="moderation")
-        self.aanthropic_messages = self.factory_function(litellm.anthropic_messages, call_type="anthropic_messages")
-        self.agenerate_content = self.factory_function(litellm.agenerate_content, call_type="agenerate_content")
-        self.aadapter_generate_content = self.factory_function(litellm.aadapter_generate_content, call_type="aadapter_generate_content")
-        self.aresponses = self.factory_function(litellm.aresponses, call_type="aresponses")
-        self.afile_delete = self.factory_function(litellm.afile_delete, call_type="afile_delete")
-        self.afile_content = self.factory_function(litellm.afile_content, call_type="afile_content")
+        self.amoderation = self.factory_function(
+            litellm.amoderation, call_type="moderation"
+        )
+        self.aanthropic_messages = self.factory_function(
+            litellm.anthropic_messages, call_type="anthropic_messages"
+        )
+        self.agenerate_content = self.factory_function(
+            litellm.agenerate_content, call_type="agenerate_content"
+        )
+        self.aadapter_generate_content = self.factory_function(
+            litellm.aadapter_generate_content, call_type="aadapter_generate_content"
+        )
+        self.aresponses = self.factory_function(
+            litellm.aresponses, call_type="aresponses"
+        )
+        self.afile_delete = self.factory_function(
+            litellm.afile_delete, call_type="afile_delete"
+        )
+        self.afile_content = self.factory_function(
+            litellm.afile_content, call_type="afile_content"
+        )
         self.responses = self.factory_function(litellm.responses, call_type="responses")
-        self.aget_responses = self.factory_function(litellm.aget_responses, call_type="aget_responses")
-        self.acancel_responses = self.factory_function(litellm.acancel_responses, call_type="acancel_responses")
-        self.adelete_responses = self.factory_function(litellm.adelete_responses, call_type="adelete_responses")
-        self.alist_input_items = self.factory_function(litellm.alist_input_items, call_type="alist_input_items")
-        self._arealtime = self.factory_function(litellm._arealtime, call_type="_arealtime")
-        self.acreate_fine_tuning_job = self.factory_function(litellm.acreate_fine_tuning_job, call_type="acreate_fine_tuning_job")
-        self.acancel_fine_tuning_job = self.factory_function(litellm.acancel_fine_tuning_job, call_type="acancel_fine_tuning_job")
-        self.alist_fine_tuning_jobs = self.factory_function(litellm.alist_fine_tuning_jobs, call_type="alist_fine_tuning_jobs")
-        self.aretrieve_fine_tuning_job = self.factory_function(litellm.aretrieve_fine_tuning_job, call_type="aretrieve_fine_tuning_job")
-        self.afile_list = self.factory_function(litellm.afile_list, call_type="alist_files")
-        self.aimage_edit = self.factory_function(litellm.aimage_edit, call_type="aimage_edit")
-        self.allm_passthrough_route = self.factory_function(litellm.allm_passthrough_route, call_type="allm_passthrough_route")
+        self.aget_responses = self.factory_function(
+            litellm.aget_responses, call_type="aget_responses"
+        )
+        self.acancel_responses = self.factory_function(
+            litellm.acancel_responses, call_type="acancel_responses"
+        )
+        self.adelete_responses = self.factory_function(
+            litellm.adelete_responses, call_type="adelete_responses"
+        )
+        self.alist_input_items = self.factory_function(
+            litellm.alist_input_items, call_type="alist_input_items"
+        )
+        self._arealtime = self.factory_function(
+            litellm._arealtime, call_type="_arealtime"
+        )
+        self.acreate_fine_tuning_job = self.factory_function(
+            litellm.acreate_fine_tuning_job, call_type="acreate_fine_tuning_job"
+        )
+        self.acancel_fine_tuning_job = self.factory_function(
+            litellm.acancel_fine_tuning_job, call_type="acancel_fine_tuning_job"
+        )
+        self.alist_fine_tuning_jobs = self.factory_function(
+            litellm.alist_fine_tuning_jobs, call_type="alist_fine_tuning_jobs"
+        )
+        self.aretrieve_fine_tuning_job = self.factory_function(
+            litellm.aretrieve_fine_tuning_job, call_type="aretrieve_fine_tuning_job"
+        )
+        self.afile_list = self.factory_function(
+            litellm.afile_list, call_type="alist_files"
+        )
+        self.aimage_edit = self.factory_function(
+            litellm.aimage_edit, call_type="aimage_edit"
+        )
+        self.allm_passthrough_route = self.factory_function(
+            litellm.allm_passthrough_route, call_type="allm_passthrough_route"
+        )
+        self.acancel_batch = self.factory_function(
+            litellm.acancel_batch, call_type="acancel_batch"
+        )
 
     def _initialize_specialized_endpoints(self):
         """Helper to initialize specialized router endpoints (vector store, OCR, search, video, container)."""
         from litellm.vector_stores.main import acreate, asearch, create, search
-        self.avector_store_search = self.factory_function(asearch, call_type="avector_store_search")
-        self.avector_store_create = self.factory_function(acreate, call_type="avector_store_create")
-        self.vector_store_search = self.factory_function(search, call_type="vector_store_search")
-        self.vector_store_create = self.factory_function(create, call_type="vector_store_create")
+
+        self.avector_store_search = self.factory_function(
+            asearch, call_type="avector_store_search"
+        )
+        self.avector_store_create = self.factory_function(
+            acreate, call_type="avector_store_create"
+        )
+        self.vector_store_search = self.factory_function(
+            search, call_type="vector_store_search"
+        )
+        self.vector_store_create = self.factory_function(
+            create, call_type="vector_store_create"
+        )
 
         from litellm.google_genai import (
             agenerate_content,
@@ -809,16 +854,27 @@ def _initialize_specialized_endpoints(self):
             generate_content,
             generate_content_stream,
         )
-        self.agenerate_content = self.factory_function(agenerate_content, call_type="agenerate_content")
-        self.generate_content = self.factory_function(generate_content, call_type="generate_content")
-        self.agenerate_content_stream = self.factory_function(agenerate_content_stream, call_type="agenerate_content_stream")
-        self.generate_content_stream = self.factory_function(generate_content_stream, call_type="generate_content_stream")
+
+        self.agenerate_content = self.factory_function(
+            agenerate_content, call_type="agenerate_content"
+        )
+        self.generate_content = self.factory_function(
+            generate_content, call_type="generate_content"
+        )
+        self.agenerate_content_stream = self.factory_function(
+            agenerate_content_stream, call_type="agenerate_content_stream"
+        )
+        self.generate_content_stream = self.factory_function(
+            generate_content_stream, call_type="generate_content_stream"
+        )
 
         from litellm.ocr import aocr, ocr
+
         self.aocr = self.factory_function(aocr, call_type="aocr")
         self.ocr = self.factory_function(ocr, call_type="ocr")
 
         from litellm.search import asearch, search
+
         self.asearch = self.factory_function(asearch, call_type="asearch")
         self.search = self.factory_function(search, call_type="search")
 
@@ -834,15 +890,30 @@ def _initialize_specialized_endpoints(self):
             video_remix,
             video_status,
         )
-        self.avideo_generation = self.factory_function(avideo_generation, call_type="avideo_generation")
-        self.video_generation = self.factory_function(video_generation, call_type="video_generation")
+
+        self.avideo_generation = self.factory_function(
+            avideo_generation, call_type="avideo_generation"
+        )
+        self.video_generation = self.factory_function(
+            video_generation, call_type="video_generation"
+        )
         self.avideo_list = self.factory_function(avideo_list, call_type="avideo_list")
         self.video_list = self.factory_function(video_list, call_type="video_list")
-        self.avideo_status = self.factory_function(avideo_status, call_type="avideo_status")
-        self.video_status = self.factory_function(video_status, call_type="video_status")
-        self.avideo_content = self.factory_function(avideo_content, call_type="avideo_content")
-        self.video_content = self.factory_function(video_content, call_type="video_content")
-        self.avideo_remix = self.factory_function(avideo_remix, call_type="avideo_remix")
+        self.avideo_status = self.factory_function(
+            avideo_status, call_type="avideo_status"
+        )
+        self.video_status = self.factory_function(
+            video_status, call_type="video_status"
+        )
+        self.avideo_content = self.factory_function(
+            avideo_content, call_type="avideo_content"
+        )
+        self.video_content = self.factory_function(
+            video_content, call_type="video_content"
+        )
+        self.avideo_remix = self.factory_function(
+            avideo_remix, call_type="avideo_remix"
+        )
         self.video_remix = self.factory_function(video_remix, call_type="video_remix")
 
         from litellm.containers import (
@@ -855,14 +926,31 @@ def _initialize_specialized_endpoints(self):
             list_containers,
             retrieve_container,
         )
-        self.acreate_container = self.factory_function(acreate_container, call_type="acreate_container")
-        self.create_container = self.factory_function(create_container, call_type="create_container")
-        self.alist_containers = self.factory_function(alist_containers, call_type="alist_containers")
-        self.list_containers = self.factory_function(list_containers, call_type="list_containers")
-        self.aretrieve_container = self.factory_function(aretrieve_container, call_type="aretrieve_container")
-        self.retrieve_container = self.factory_function(retrieve_container, call_type="retrieve_container")
-        self.adelete_container = self.factory_function(adelete_container, call_type="adelete_container")
-        self.delete_container = self.factory_function(delete_container, call_type="delete_container")
+
+        self.acreate_container = self.factory_function(
+            acreate_container, call_type="acreate_container"
+        )
+        self.create_container = self.factory_function(
+            create_container, call_type="create_container"
+        )
+        self.alist_containers = self.factory_function(
+            alist_containers, call_type="alist_containers"
+        )
+        self.list_containers = self.factory_function(
+            list_containers, call_type="list_containers"
+        )
+        self.aretrieve_container = self.factory_function(
+            aretrieve_container, call_type="aretrieve_container"
+        )
+        self.retrieve_container = self.factory_function(
+            retrieve_container, call_type="retrieve_container"
+        )
+        self.adelete_container = self.factory_function(
+            adelete_container, call_type="adelete_container"
+        )
+        self.delete_container = self.factory_function(
+            delete_container, call_type="delete_container"
+        )
 
     def initialize_router_endpoints(self):
         self._initialize_core_endpoints()
@@ -1226,7 +1314,10 @@ async def stream_with_fallbacks():
 
     async def _acompletion(
         self, model: str, messages: List[Dict[str, str]], **kwargs
-    ) -> Union[ModelResponse, CustomStreamWrapper,]:
+    ) -> Union[
+        ModelResponse,
+        CustomStreamWrapper,
+    ]:
         """
         - Get an available deployment
         - call it with a semaphore over the call
@@ -2694,21 +2785,19 @@ async def _aadapter_completion(self, adapter_id: str, model: str, **kwargs):
                 self.fail_calls[model] += 1
             raise e
 
-    async def _asearch_with_fallbacks(
-        self, original_function: Callable, **kwargs
-    ):
+    async def _asearch_with_fallbacks(self, original_function: Callable, **kwargs):
         """
         Helper function to make a search API call through the router with load balancing and fallbacks.
         Reuses the router's retry/fallback infrastructure.
         """
         from litellm.router_utils.search_api_router import SearchAPIRouter
-        
+
         return await SearchAPIRouter.async_search_with_fallbacks(
             router_instance=self,
             original_function=original_function,
             **kwargs,
         )
-    
+
     async def _asearch_with_fallbacks_helper(
         self, model: str, original_generic_function: Callable, **kwargs
     ):
@@ -2717,7 +2806,7 @@ async def _asearch_with_fallbacks_helper(
         Called by async_function_with_fallbacks for each retry attempt.
         """
         from litellm.router_utils.search_api_router import SearchAPIRouter
-        
+
         return await SearchAPIRouter.async_search_with_fallbacks_helper(
             router_instance=self,
             model=model,
@@ -2755,11 +2844,9 @@ async def _ageneric_api_call_with_fallbacks(
                 )
             )
             raise e
-    
+
     def _add_deployment_model_to_endpoint_for_llm_passthrough_route(
-        self, kwargs: Dict[str, Any], 
-        model: str, 
-        model_name: str
+        self, kwargs: Dict[str, Any], model: str, model_name: str
     ) -> Dict[str, Any]:
         """
         Add the deployment model to the endpoint for LLM passthrough route.
@@ -2771,7 +2858,7 @@ def _add_deployment_model_to_endpoint_for_llm_passthrough_route(
             # For provider-specific endpoints, strip the provider prefix from model_name
             # e.g., "bedrock/us.anthropic.claude-3-5-sonnet-20240620-v1:0" -> "us.anthropic.claude-3-5-sonnet-20240620-v1:0"
             from litellm import get_llm_provider
-            
+
             try:
                 # get_llm_provider returns (model_without_prefix, provider, api_key, api_base)
                 stripped_model_name, _, _, _ = get_llm_provider(
@@ -2783,8 +2870,10 @@ def _add_deployment_model_to_endpoint_for_llm_passthrough_route(
             except Exception:
                 # If get_llm_provider fails, fall back to using model_name as-is
                 replacement_model_name = model_name
-            
-            kwargs["endpoint"] = kwargs["endpoint"].replace(model, replacement_model_name)
+
+            kwargs["endpoint"] = kwargs["endpoint"].replace(
+                model, replacement_model_name
+            )
         return kwargs
 
     async def _ageneric_api_call_with_fallbacks_helper(
@@ -2818,7 +2907,9 @@ async def _ageneric_api_call_with_fallbacks_helper(
             model_name = data["model"]
             self.total_calls[model_name] += 1
 
-            self._add_deployment_model_to_endpoint_for_llm_passthrough_route(kwargs=kwargs, model=model, model_name=model_name)
+            self._add_deployment_model_to_endpoint_for_llm_passthrough_route(
+                kwargs=kwargs, model=model, model_name=model_name
+            )
             ### get custom
             response = original_generic_function(
                 **{
@@ -3247,9 +3338,9 @@ async def create_file_for_deployment(deployment: dict) -> OpenAIFileObject:
                 healthy_deployments=healthy_deployments, responses=responses
             )
             returned_response = cast(OpenAIFileObject, responses[0])
-            returned_response._hidden_params[
-                "model_file_id_mapping"
-            ] = model_file_id_mapping
+            returned_response._hidden_params["model_file_id_mapping"] = (
+                model_file_id_mapping
+            )
             return returned_response
         except Exception as e:
             verbose_router_logger.exception(
@@ -3582,6 +3673,7 @@ def factory_function(
             "afile_delete",
             "afile_content",
             "_arealtime",
+            "acancel_batch",
             "acreate_fine_tuning_job",
             "acancel_fine_tuning_job",
             "alist_fine_tuning_jobs",
@@ -3620,7 +3712,7 @@ def factory_function(
             "aretrieve_container",
             "retrieve_container",
             "adelete_container",
-            "delete_container"
+            "delete_container",
         ] = "assistants",
     ):
         """
@@ -3706,6 +3798,7 @@ async def async_wrapper(
                 "alist_containers",
                 "aretrieve_container",
                 "adelete_container",
+                "acancel_batch",
             ):
                 return await self._ageneric_api_call_with_fallbacks(
                     original_function=original_function,
@@ -3862,11 +3955,11 @@ async def async_function_with_fallbacks_common_utils(  # noqa: PLR0915
 
             if isinstance(e, litellm.ContextWindowExceededError):
                 if context_window_fallbacks is not None:
-                    context_window_fallback_model_group: Optional[
-                        List[str]
-                    ] = self._get_fallback_model_group_from_fallbacks(
-                        fallbacks=context_window_fallbacks,
-                        model_group=model_group,
+                    context_window_fallback_model_group: Optional[List[str]] = (
+                        self._get_fallback_model_group_from_fallbacks(
+                            fallbacks=context_window_fallbacks,
+                            model_group=model_group,
+                        )
                     )
                     if context_window_fallback_model_group is None:
                         raise original_exception
@@ -3898,11 +3991,11 @@ async def async_function_with_fallbacks_common_utils(  # noqa: PLR0915
                     e.message += "\n{}".format(error_message)
             elif isinstance(e, litellm.ContentPolicyViolationError):
                 if content_policy_fallbacks is not None:
-                    content_policy_fallback_model_group: Optional[
-                        List[str]
-                    ] = self._get_fallback_model_group_from_fallbacks(
-                        fallbacks=content_policy_fallbacks,
-                        model_group=model_group,
+                    content_policy_fallback_model_group: Optional[List[str]] = (
+                        self._get_fallback_model_group_from_fallbacks(
+                            fallbacks=content_policy_fallbacks,
+                            model_group=model_group,
+                        )
                     )
                     if content_policy_fallback_model_group is None:
                         raise original_exception
@@ -4620,7 +4713,7 @@ def deployment_callback_on_failure(
         try:
             exception = kwargs.get("exception", None)
             exception_status = getattr(exception, "status_code", "")
-            
+
             # Cache litellm_params to avoid repeated dict lookups
             litellm_params = kwargs.get("litellm_params", {})
             _model_info = litellm_params.get("model_info", {})
@@ -5144,26 +5237,26 @@ def init_auto_router_deployment(self, deployment: Deployment):
         """
         from litellm.router_strategy.auto_router.auto_router import AutoRouter
 
-        auto_router_config_path: Optional[
-            str
-        ] = deployment.litellm_params.auto_router_config_path
+        auto_router_config_path: Optional[str] = (
+            deployment.litellm_params.auto_router_config_path
+        )
         auto_router_config: Optional[str] = deployment.litellm_params.auto_router_config
         if auto_router_config_path is None and auto_router_config is None:
             raise ValueError(
                 "auto_router_config_path or auto_router_config is required for auto-router deployments. Please set it in the litellm_params"
             )
 
-        default_model: Optional[
-            str
-        ] = deployment.litellm_params.auto_router_default_model
+        default_model: Optional[str] = (
+            deployment.litellm_params.auto_router_default_model
+        )
         if default_model is None:
             raise ValueError(
                 "auto_router_default_model is required for auto-router deployments. Please set it in the litellm_params"
             )
 
-        embedding_model: Optional[
-            str
-        ] = deployment.litellm_params.auto_router_embedding_model
+        embedding_model: Optional[str] = (
+            deployment.litellm_params.auto_router_embedding_model
+        )
         if embedding_model is None:
             raise ValueError(
                 "auto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params"
@@ -5269,7 +5362,7 @@ def set_model_list(self, model_list: list):
             f"\nInitialized Model List {self.get_model_names()}"
         )
         self.model_names = {m["model_name"] for m in model_list}
-        
+
         # Note: model_name_to_deployment_indices is already built incrementally
         # by _create_deployment -> _add_model_to_list_and_index_map
 
@@ -5494,13 +5587,13 @@ def _update_deployment_indices_after_removal(
         # Remove the deleted model from index
         if model_id in self.model_id_to_deployment_index_map:
             del self.model_id_to_deployment_index_map[model_id]
-        
+
         # Update model_name_to_deployment_indices
         for model_name, indices in list(self.model_name_to_deployment_indices.items()):
             # Remove the deleted index
             if removal_idx in indices:
                 indices.remove(removal_idx)
-            
+
             # Decrement all indices greater than removal_idx
             updated_indices = []
             for idx in indices:
@@ -5508,7 +5601,7 @@ def _update_deployment_indices_after_removal(
                     updated_indices.append(idx - 1)
                 else:
                     updated_indices.append(idx)
-            
+
             # Update or remove the entry
             if len(updated_indices) > 0:
                 self.model_name_to_deployment_indices[model_name] = updated_indices
@@ -5527,13 +5620,13 @@ def _add_model_to_list_and_index_map(
         """
         idx = len(self.model_list)
         self.model_list.append(model)
-        
+
         # Update model_id index for O(1) lookup
         if model_id is not None:
             self.model_id_to_deployment_index_map[model_id] = idx
         elif model.get("model_info", {}).get("id") is not None:
             self.model_id_to_deployment_index_map[model["model_info"]["id"]] = idx
-        
+
         # Update model_name index for O(1) lookup
         model_name = model.get("model_name")
         if model_name:
@@ -5653,7 +5746,7 @@ def get_deployment_by_model_group_name(
         Returns -> Deployment or None
 
         Raise Exception -> if model found in invalid format
-        
+
         Optimized with O(1) index lookup instead of O(n) linear scan.
         """
         # O(1) lookup in model_name index
@@ -5771,7 +5864,7 @@ def get_model_info(self, id: str) -> Optional[dict]:
         Returns
         - dict: the model in list with 'model_name', 'litellm_params', Optional['model_info']
         - None: could not find deployment in list
-        
+
         Optimized with O(1) index lookup instead of O(n) linear scan.
         """
         # O(1) lookup via model_id_to_deployment_index_map
@@ -5886,11 +5979,11 @@ def _set_model_group_info(  # noqa: PLR0915
             configurable_clientside_auth_params = (
                 litellm_params.configurable_clientside_auth_params
             )
-            
+
             # Cache nested dict access to avoid repeated temporary dict allocations
             model_litellm_params = model.get("litellm_params", {})
             model_info_dict = model.get("model_info", {})
-            
+
             # get model tpm
             _deployment_tpm: Optional[int] = None
             if _deployment_tpm is None:
@@ -6266,12 +6359,12 @@ async def set_response_headers(
     def _build_model_name_index(self, model_list: list) -> None:
         """
         Build model_name -> deployment indices mapping for O(1) lookups.
-        
+
         This index allows us to find all deployments for a given model_name in O(1) time
         instead of O(n) linear scan through the entire model_list.
         """
         self.model_name_to_deployment_indices.clear()
-        
+
         for idx, model in enumerate(model_list):
             model_name = model.get("model_name")
             if model_name:
@@ -6311,12 +6404,12 @@ def get_model_ids(
         if 'model_name' is none, returns all.
 
         Returns list of model id's.
-        
+
         Optimized with O(1) or O(k) index lookup when model_name provided,
         instead of O(n) linear scan.
-        """        
+        """
         ids = []
-        
+
         if model_name is not None:
             # O(1) lookup in model_name index, then O(k) iteration where k = deployments for this model_name
             if model_name in self.model_name_to_deployment_indices:
@@ -6337,7 +6430,7 @@ def get_model_ids(
                     if exclude_team_models and model["model_info"].get("team_id"):
                         continue
                     ids.append(model_id)
-        
+
         return ids
 
     def has_model_id(self, candidate_id: str) -> bool:
@@ -6399,15 +6492,15 @@ def _get_all_deployments(
         Used for accurate 'get_model_list'.
 
         if team_id specified, only return team-specific models
-        
+
         Optimized with O(1) index lookup instead of O(n) linear scan.
         """
         returned_models: List[DeploymentTypedDict] = []
-        
+
         # O(1) lookup in model_name index
         if model_name in self.model_name_to_deployment_indices:
             indices = self.model_name_to_deployment_indices[model_name]
-            
+
             # O(k) where k = deployments for this model_name (typically 1-10)
             for idx in indices:
                 model = self.model_list[idx]
@@ -6556,9 +6649,7 @@ def get_model_list(
                 potential_team_only_wildcard_models = (
                     self.team_pattern_routers[team_id].route(model_name) or []
                 )
-                potential_wildcard_models.extend(
-                    potential_team_only_wildcard_models
-                )
+                potential_wildcard_models.extend(potential_team_only_wildcard_models)
 
             if model_name is not None and potential_wildcard_models is not None:
                 for m in potential_wildcard_models:
@@ -6821,7 +6912,7 @@ def _pre_call_checks(  # noqa: PLR0915
             # Cache nested dict access to avoid repeated temporary dict allocations
             _litellm_params = deployment.get("litellm_params", {})
             _model_info = deployment.get("model_info", {})
-            
+
             # see if we have the info for this model
             try:
                 base_model = _model_info.get("base_model", None)
@@ -6949,7 +7040,9 @@ def _pre_call_checks(  # noqa: PLR0915
         if len(invalid_model_indices) > 0:
             # Single-pass filter using set for O(1) lookups (avoids O(n^2) from repeated pops)
             _returned_deployments = [
-                d for i, d in enumerate(_returned_deployments) if i not in invalid_model_indices
+                d
+                for i, d in enumerate(_returned_deployments)
+                if i not in invalid_model_indices
             ]
 
         ## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
@@ -7505,7 +7598,8 @@ def _filter_cooldown_deployments(
         # Convert to set for O(1) lookup and use list comprehension for O(n) filtering
         cooldown_set = set(cooldown_deployments)
         return [
-            deployment for deployment in healthy_deployments
+            deployment
+            for deployment in healthy_deployments
             if deployment["model_info"]["id"] not in cooldown_set
         ]