diff --git a/litellm/litellm_core_utils/token_counter.py b/litellm/litellm_core_utils/token_counter.py index fab2c1e76ee4..36f095cb1525 100644 --- a/litellm/litellm_core_utils/token_counter.py +++ b/litellm/litellm_core_utils/token_counter.py @@ -3,7 +3,17 @@ import base64 import io import struct -from typing import Callable, List, Literal, Optional, Tuple, Union, cast +from typing import ( + Any, + Callable, + List, + Literal, + Optional, + Tuple, + Union, + cast, + get_type_hints, +) import tiktoken @@ -20,6 +30,10 @@ ) from litellm.litellm_core_utils.default_encoding import encoding as default_encoding from litellm.llms.custom_httpx.http_handler import _get_httpx_client +from litellm.types.llms.anthropic import ( + AnthropicMessagesToolResultParam, + AnthropicMessagesToolUseParam, +) from litellm.types.llms.openai import ( AllMessageValues, ChatCompletionNamedToolChoiceParam, @@ -552,6 +566,133 @@ def _fix_model_name(model: str) -> str: return "gpt-3.5-turbo" +def _count_image_tokens( + image_url: Any, + use_default_image_token_count: bool, +) -> int: + """ + Count tokens for an image_url content block. + + Args: + image_url: The image URL data - can be a string URL or dict with 'url' and 'detail' + use_default_image_token_count: Whether to use default image token counts + + Returns: + int: Number of tokens for the image + + Raises: + ValueError: If image_url is invalid type or detail value is invalid + """ + if isinstance(image_url, dict): + detail = image_url.get("detail", "auto") + if detail not in ["low", "high", "auto"]: + raise ValueError( + f"Invalid detail value: {detail}. Expected 'low', 'high', or 'auto'." + ) + url = image_url.get("url") + if not url: + raise ValueError("Missing required key 'url' in image_url dict.") + return calculate_img_tokens( + data=url, + mode=detail, # type: ignore + use_default_image_token_count=use_default_image_token_count, + ) + + elif isinstance(image_url, str): + if not image_url.strip(): + raise ValueError("Empty image_url string is not valid.") + return calculate_img_tokens( + data=image_url, + mode="auto", + use_default_image_token_count=use_default_image_token_count, + ) + + else: + raise ValueError( + f"Invalid image_url type: {type(image_url).__name__}. " + "Expected str or dict with 'url' field." + ) + + +def _validate_anthropic_content(content: dict) -> type: + """ + Validate and determine which Anthropic TypedDict applies. + + Returns the corresponding TypedDict class if recognized, otherwise raises. + """ + content_type = content.get("type") + if not content_type: + raise ValueError("Anthropic content missing required field: 'type'") + + mapping = { + "tool_use": AnthropicMessagesToolUseParam, + "tool_result": AnthropicMessagesToolResultParam, + } + + expected_cls = mapping.get(content_type) + if expected_cls is None: + raise ValueError(f"Unknown Anthropic content type: '{content_type}'") + + missing = [ + k for k in getattr(expected_cls, "__required_keys__", set()) if k not in content + ] + if missing: + raise ValueError( + f"Missing required fields in {content_type} block: {', '.join(missing)}" + ) + + return expected_cls + + +def _count_anthropic_content( + content: dict, + count_function: TokenCounterFunction, + use_default_image_token_count: bool, + default_token_count: Optional[int], +) -> int: + """ + Count tokens in Anthropic-specific content blocks (tool_use, tool_result, etc.). + + Uses TypedDict definitions from litellm.types.llms.anthropic to determine + what fields to count and how to handle nested structures. + + Dynamically infers which fields to count based on the TypedDict definition, + avoiding hardcoded field names. + """ + typeddict_cls = _validate_anthropic_content(content) + type_hints = getattr(typeddict_cls, "__annotations__", {}) + tokens = 0 + + # Fields to skip (metadata/identifiers that don't contribute to prompt tokens) + skip_fields = {"type", "id", "tool_use_id", "cache_control", "is_error"} + + # Iterate over all fields defined in the TypedDict + for field_name, field_type in type_hints.items(): + if field_name in skip_fields: + continue + + field_value = content.get(field_name) + if field_value is None: + continue + try: + if isinstance(field_value, str): + tokens += count_function(field_value) + elif isinstance(field_value, list): + tokens += _count_content_list( + count_function, + field_value, # type: ignore + use_default_image_token_count, + default_token_count, + ) + elif isinstance(field_value, dict): + tokens += count_function(str(field_value)) + except Exception as e: + if default_token_count is not None: + return default_token_count + raise ValueError(f"Error counting field '{field_name}': {e}") + return tokens + + def _count_content_list( count_function: TokenCounterFunction, content_list: OpenAIMessageContent, @@ -559,50 +700,44 @@ def _count_content_list( default_token_count: Optional[int], ) -> int: """ - Get the number of tokens from a list of content. + Recursively count tokens from a list of content blocks. """ try: num_tokens = 0 for c in content_list: if isinstance(c, str): num_tokens += count_function(c) - elif c["type"] == "text": - num_tokens += count_function(c["text"]) - elif c["type"] == "image_url": - if isinstance(c["image_url"], dict): - image_url_dict = c["image_url"] - detail = image_url_dict.get("detail", "auto") - if detail not in ["low", "high", "auto"]: - raise ValueError( - f"Invalid detail value: {detail}. Expected 'low', 'high', or 'auto'." - ) - url = image_url_dict.get("url") - num_tokens += calculate_img_tokens( - data=url, - mode=detail, # type: ignore - use_default_image_token_count=use_default_image_token_count, + elif isinstance(c, dict): + ctype = c.get("type") + if ctype == "text": + num_tokens += count_function(c.get("text", "")) + elif ctype == "image_url": + image_url = c.get("image_url") + num_tokens += _count_image_tokens( + image_url, use_default_image_token_count ) - elif isinstance(c["image_url"], str): - image_url_str = c["image_url"] - num_tokens += calculate_img_tokens( - data=image_url_str, - mode="auto", - use_default_image_token_count=use_default_image_token_count, + elif ctype in ("tool_use", "tool_result"): + num_tokens += _count_anthropic_content( + c, + count_function, + use_default_image_token_count, + default_token_count, ) else: - raise ValueError( - f"Invalid image_url type: {type(c['image_url'])}. Expected str or dict." - ) + raise ValueError(f"Invalid content type: {ctype}") else: raise ValueError( - f"Invalid content type: {type(c)}. Expected str or dict." + f"Invalid content item type: {type(c).__name__}. " + f"Expected str or dict with 'type' field. " + f"Value: {c!r}" ) return num_tokens except Exception as e: if default_token_count is not None: return default_token_count raise ValueError( - f"Error getting number of tokens from content list: {e}, default_token_count={default_token_count}" + f"Error getting number of tokens from content list: {e}, " + f"default_token_count={default_token_count}" ) diff --git a/tests/test_litellm/litellm_core_utils/test_token_counter.py b/tests/test_litellm/litellm_core_utils/test_token_counter.py index 5d17ea3dc3cf..8cd623267ba3 100644 --- a/tests/test_litellm/litellm_core_utils/test_token_counter.py +++ b/tests/test_litellm/litellm_core_utils/test_token_counter.py @@ -631,3 +631,269 @@ def test_bad_input_token_counter(model, messages): messages=messages, default_token_count=1000, ) + + +def test_token_counter_with_anthropic_tool_use(): + """ + Test that _count_anthropic_content() correctly handles tool_use blocks. + + Validates that: + - 'name' field is counted (string) + - 'input' field is counted (dict serialized to string) + - Metadata fields ('type', 'id') are skipped + """ + messages = [ + { + "role": "user", + "content": "What's the weather in San Francisco?" + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "I'll check the weather for you." + }, + { + "type": "tool_use", + "id": "toolu_01234567890", # Should be skipped + "name": "get_weather", # Should be counted + "input": { # Should be counted (serialized) + "location": "San Francisco, CA", + "unit": "fahrenheit" + } + } + ] + } + ] + + tokens = token_counter(model="gpt-3.5-turbo", messages=messages) + assert tokens > 0, f"Expected positive token count, got {tokens}" + # Should count: user message + "I'll check" text + "get_weather" name + input dict + assert tokens > 15, f"Expected reasonable token count for message with tool_use, got {tokens}" + + +def test_token_counter_with_anthropic_tool_result(): + """ + Test that _count_anthropic_content() correctly handles tool_result blocks. + + Validates that: + - 'content' field (when string) is counted + - Metadata fields ('type', 'tool_use_id') are skipped + - Full conversation with tool_use → tool_result flow works + """ + messages = [ + { + "role": "user", + "content": "What's the weather in San Francisco?" + }, + { + "role": "assistant", + "content": [ + { + "type": "tool_use", + "id": "toolu_01234567890", + "name": "get_weather", + "input": { + "location": "San Francisco, CA" + } + } + ] + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_01234567890", # Should be skipped + "content": "The weather in San Francisco is 65°F and sunny." # Should be counted + } + ] + } + ] + + tokens = token_counter(model="gpt-3.5-turbo", messages=messages) + assert tokens > 0, f"Expected positive token count, got {tokens}" + assert tokens > 25, f"Expected reasonable token count for conversation with tool_result, got {tokens}" + + +def test_token_counter_with_nested_tool_result(): + """ + Test that _count_anthropic_content() recursively handles nested content lists. + + Validates that: + - tool_result with 'content' as a list (not string) is handled + - Nested content blocks are recursively counted via _count_content_list() + - TypedDict inference correctly identifies list fields + """ + messages = [ + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_01234567890", + "content": [ # Nested list - should recursively count + { + "type": "text", + "text": "The weather in San Francisco is 65°F and sunny." + }, + { + "type": "text", + "text": "UV index is moderate." + } + ] + } + ] + } + ] + + tokens = token_counter(model="gpt-3.5-turbo", messages=messages) + assert tokens > 0, f"Expected positive token count, got {tokens}" + # Should count both nested text blocks + assert tokens > 15, f"Expected reasonable token count for nested tool_result, got {tokens}" + + +def test_token_counter_tool_use_and_result_combined(): + """ + Test dynamic field inference with multiple tool_use and tool_result blocks. + + Validates that: + - Multiple tool_use blocks in same message are handled + - Multiple tool_result blocks in same message are handled + - skip_fields correctly filters metadata across all blocks + - Full realistic conversation flow works end-to-end + """ + messages = [ + { + "role": "user", + "content": "What's the weather in San Francisco and New York?" + }, + { + "role": "assistant", + "content": [ + { + "type": "text", + "text": "I'll check the weather in both cities for you." + }, + { + "type": "tool_use", + "id": "toolu_01A", + "name": "get_weather", + "input": {"location": "San Francisco, CA"} + }, + { + "type": "tool_use", + "id": "toolu_01B", + "name": "get_weather", + "input": {"location": "New York, NY"} + } + ] + }, + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "toolu_01A", + "content": "San Francisco: 65°F, sunny" + }, + { + "type": "tool_result", + "tool_use_id": "toolu_01B", + "content": "New York: 45°F, cloudy" + } + ] + }, + { + "role": "assistant", + "content": "The weather in San Francisco is 65°F and sunny, while New York is cooler at 45°F and cloudy." + } + ] + + tokens = token_counter(model="gpt-3.5-turbo", messages=messages) + assert tokens > 0, f"Expected positive token count, got {tokens}" + # Should count all text, tool names, inputs, and results + assert tokens > 60, f"Expected substantial token count for full tool conversation, got {tokens}" + + +def test_token_counter_with_image_url(): + """ + Test that _count_image_tokens() correctly handles image_url content blocks. + + Validates that: + - image_url as dict with 'url' and 'detail' is handled + - image_url as string is handled + - 'detail' field validation works ('low', 'high', 'auto') + - calculate_img_tokens is called with correct parameters + """ + # Test with dict format (detail: low) + messages_dict = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg", + "detail": "low" # Should use low token count (85 base tokens) + } + } + ] + } + ] + + tokens_dict = token_counter( + model="gpt-3.5-turbo", + messages=messages_dict, + use_default_image_token_count=True # Avoid actual HTTP request + ) + assert tokens_dict > 0, f"Expected positive token count, got {tokens_dict}" + assert tokens_dict > 85, f"Expected at least base image tokens, got {tokens_dict}" + + # Test with string format (defaults to auto/low) + messages_str = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": "https://example.com/image.jpg" # String format + } + ] + } + ] + + tokens_str = token_counter( + model="gpt-3.5-turbo", + messages=messages_str, + use_default_image_token_count=True + ) + assert tokens_str > 0, f"Expected positive token count for string image_url, got {tokens_str}" + + # Test invalid detail value raises error + messages_invalid = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://example.com/image.jpg", + "detail": "invalid" # Should raise ValueError + } + } + ] + } + ] + + try: + token_counter(model="gpt-3.5-turbo", messages=messages_invalid) + assert False, "Expected ValueError for invalid detail value" + except ValueError as e: + assert "Invalid detail value" in str(e), f"Expected detail validation error, got: {e}" +