BerriAI · TeddyAmkie · Oct 8, 2025 · Oct 8, 2025
diff --git a/docs/my-website/docs/completion/input.md b/docs/my-website/docs/completion/input.md
@@ -164,6 +164,12 @@ def completion(
 
 - `max_tokens`: *integer (optional)* - The maximum number of tokens to generate in the chat completion.
 
+:::info **max_tokens vs max_output_tokens**
+
+`max_tokens` and `max_output_tokens` are equivalent - `max_tokens` is legacy, `max_output_tokens` is current standard. LiteLLM uses `max_output_tokens` internally.
+
+:::
+
 - `presence_penalty`: *number or null (optional)* - It is used to penalize new tokens based on their existence in the text so far.
 
 - `response_format`: *object (optional)* - An object specifying the format that the model must output.

diff --git a/docs/my-website/docs/completion/token_usage.md b/docs/my-website/docs/completion/token_usage.md
@@ -31,6 +31,12 @@ LiteLLM also exposes some helper functions:
 
 - `get_max_tokens`: This returns the maximum number of tokens allowed for the given model. [**Jump to code**](#7-get_max_tokens)
 
+:::info **Note**
+
+`get_max_tokens()` returns `max_output_tokens` from model config. `max_tokens` is legacy, `max_output_tokens` is current standard.
+
+:::
+
 - `model_cost`: This returns a dictionary for all models, with their max_tokens, input_cost_per_token and output_cost_per_token. It uses the `api.litellm.ai` call shown below. [**Jump to code**](#8-model_cost)
 
 - `register_model`: This registers new / overrides existing models (and their pricing details) in the model cost dictionary. [**Jump to code**](#9-register_model)

diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -1,4 +1,44 @@
 {
+    "sample_spec": {
+        "code_interpreter_cost_per_session": 0.0,
+        "computer_use_input_cost_per_1k_tokens": 0.0,
+        "computer_use_output_cost_per_1k_tokens": 0.0,
+        "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD",
+        "file_search_cost_per_1k_calls": 0.0,
+        "file_search_cost_per_gb_per_day": 0.0,
+        "input_cost_per_audio_token": 0.0,
+        "input_cost_per_token": 0.0,
+        "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
+        "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens",
+        "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
+        "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.",
+        "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
+        "output_cost_per_reasoning_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "search_context_cost_per_query": {
+            "search_context_size_high": 0.0,
+            "search_context_size_low": 0.0,
+            "search_context_size_medium": 0.0
+        },
+        "supported_regions": [
+            "global",
+            "us-west-2",
+            "eu-west-1",
+            "ap-southeast-1",
+            "ap-northeast-1"
+        ],
+        "supports_audio_input": true,
+        "supports_audio_output": true,
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_vision": true,
+        "supports_web_search": true,
+        "vector_store_cost_per_gb_per_day": 0.0
+    },
     "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": {
         "litellm_provider": "bedrock",
         "max_input_tokens": 2600,
@@ -18970,46 +19010,6 @@
         "supports_reasoning": true,
         "source": "https://cloud.sambanova.ai/plans/pricing"
     },
-    "sample_spec": {
-        "code_interpreter_cost_per_session": 0.0,
-        "computer_use_input_cost_per_1k_tokens": 0.0,
-        "computer_use_output_cost_per_1k_tokens": 0.0,
-        "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD",
-        "file_search_cost_per_1k_calls": 0.0,
-        "file_search_cost_per_gb_per_day": 0.0,
-        "input_cost_per_audio_token": 0.0,
-        "input_cost_per_token": 0.0,
-        "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
-        "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens",
-        "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens",
-        "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.",
-        "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
-        "output_cost_per_reasoning_token": 0.0,
-        "output_cost_per_token": 0.0,
-        "search_context_cost_per_query": {
-            "search_context_size_high": 0.0,
-            "search_context_size_low": 0.0,
-            "search_context_size_medium": 0.0
-        },
-        "supported_regions": [
-            "global",
-            "us-west-2",
-            "eu-west-1",
-            "ap-southeast-1",
-            "ap-northeast-1"
-        ],
-        "supports_audio_input": true,
-        "supports_audio_output": true,
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true,
-        "supports_reasoning": true,
-        "supports_response_schema": true,
-        "supports_system_messages": true,
-        "supports_vision": true,
-        "supports_web_search": true,
-        "vector_store_cost_per_gb_per_day": 0.0
-    },
     "snowflake/claude-3-5-sonnet": {
         "litellm_provider": "snowflake",
         "max_input_tokens": 18000,