From 61565901906594ded90be233679d8c70469a9585 Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Sat, 13 Sep 2025 01:38:08 -0400 Subject: [PATCH 1/7] added spend metrics --- .../integrations/datadog/datadog_llm_obs.py | 48 +++++- litellm/types/integrations/datadog_llm_obs.py | 6 + .../datadog/test_datadog_llm_observability.py | 146 +++++++++++++++++- 3 files changed, 192 insertions(+), 8 deletions(-) diff --git a/litellm/integrations/datadog/datadog_llm_obs.py b/litellm/integrations/datadog/datadog_llm_obs.py index 2eaf13442e1a..457fafd75b4d 100644 --- a/litellm/integrations/datadog/datadog_llm_obs.py +++ b/litellm/integrations/datadog/datadog_llm_obs.py @@ -185,7 +185,6 @@ def create_llm_obs_payload( messages = standard_logging_payload["messages"] messages = self._ensure_string_content(messages=messages) - response_obj = standard_logging_payload.get("response") metadata = kwargs.get("litellm_params", {}).get("metadata", {}) @@ -495,6 +494,12 @@ def _get_dd_llm_obs_payload_metadata( latency_metrics = self._get_latency_metrics(standard_logging_payload) _metadata.update({"latency_metrics": dict(latency_metrics)}) + ######################################################### + # Add spend metrics to metadata + ######################################################### + spend_metrics = self._get_spend_metrics(standard_logging_payload) + _metadata.update({"spend_metrics": dict(spend_metrics)}) + ## extract tool calls and add to metadata tool_call_metadata = self._extract_tool_call_metadata(standard_logging_payload) _metadata.update(tool_call_metadata) @@ -543,6 +548,47 @@ def _get_latency_metrics( ) return latency_metrics + + def _get_spend_metrics( + self, standard_logging_payload: StandardLoggingPayload + ) -> DDLLMObsSpendMetrics: + """ + Get the spend metrics from the standard logging payload + """ + spend_metrics: DDLLMObsSpendMetrics = DDLLMObsSpendMetrics() + + # Get response cost for litellm_spend_metric + response_cost = standard_logging_payload.get("response_cost", 0.0) + if response_cost > 0: + spend_metrics["litellm_spend_metric"] = response_cost + + # Get budget information from metadata + metadata = standard_logging_payload.get("metadata", {}) + + # API key max budget + user_api_key_max_budget = metadata.get("user_api_key_max_budget") + if user_api_key_max_budget is not None: + spend_metrics["litellm_api_key_max_budget_metric"] = user_api_key_max_budget + + # API key budget remaining hours + user_api_key_budget_reset_at = metadata.get("user_api_key_budget_reset_at") + if user_api_key_budget_reset_at is not None: + try: + from datetime import datetime + if isinstance(user_api_key_budget_reset_at, str): + # Parse ISO string if it's a string + budget_reset_at = datetime.fromisoformat(user_api_key_budget_reset_at.replace('Z', '+00:00')) + else: + budget_reset_at = user_api_key_budget_reset_at + + remaining_hours = ( + budget_reset_at - datetime.now(budget_reset_at.tzinfo) + ).total_seconds() / 3600 + spend_metrics["litellm_api_key_budget_remaining_hours_metric"] = max(0, remaining_hours) + except Exception as e: + verbose_logger.debug(f"Error calculating remaining hours for budget reset: {e}") + + return spend_metrics def _process_input_messages_preserving_tool_calls( self, messages: List[Any] diff --git a/litellm/types/integrations/datadog_llm_obs.py b/litellm/types/integrations/datadog_llm_obs.py index 41489ace30f5..8b94dd7b5910 100644 --- a/litellm/types/integrations/datadog_llm_obs.py +++ b/litellm/types/integrations/datadog_llm_obs.py @@ -81,3 +81,9 @@ class DDLLMObsLatencyMetrics(TypedDict, total=False): time_to_first_token_ms: float litellm_overhead_time_ms: float guardrail_overhead_time_ms: float + +class DDLLMObsSpendMetrics(TypedDict, total=False): + litellm_spend_metric: float + litellm_api_key_max_budget_metric: float + litellm_remaining_api_key_budget_metric: float + litellm_api_key_budget_remaining_hours_metric: float diff --git a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py index 39b8427fcf12..f167eb24b2ec 100644 --- a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py +++ b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py @@ -657,21 +657,109 @@ def test_guardrail_information_in_metadata(mock_env_vars): assert guardrail_info["guardrail_response"]["score"] == 0.1 +def create_standard_logging_payload_with_spend_metrics() -> StandardLoggingPayload: + """Create a StandardLoggingPayload object with spend metrics for testing""" + from datetime import datetime, timezone + + # Create a budget reset time 24 hours from now + budget_reset_at = datetime.now(timezone.utc) + timedelta(hours=24) + + return { + "id": "test-request-id-spend", + "trace_id": "test-trace-id-spend", + "call_type": "completion", + "stream": None, + "response_cost": 0.15, + "response_cost_failure_debug_info": None, + "status": "success", + "custom_llm_provider": "openai", + "total_tokens": 30, + "prompt_tokens": 10, + "completion_tokens": 20, + "startTime": 1234567890.0, + "endTime": 1234567891.0, + "completionStartTime": 1234567890.5, + "response_time": 1.0, + "model_map_information": { + "model_map_key": "gpt-4", + "model_map_value": None + }, + "model": "gpt-4", + "model_id": "model-123", + "model_group": "openai-gpt", + "api_base": "https://api.openai.com", + "metadata": { + "user_api_key_hash": "test_hash", + "user_api_key_org_id": None, + "user_api_key_alias": "test_alias", + "user_api_key_team_id": "test_team", + "user_api_key_user_id": "test_user", + "user_api_key_team_alias": "test_team_alias", + "user_api_key_user_email": None, + "user_api_key_end_user_id": None, + "user_api_key_request_route": None, + "user_api_key_max_budget": 10.0, # $10 max budget + "user_api_key_budget_reset_at": budget_reset_at.isoformat(), + "spend_logs_metadata": None, + "requester_ip_address": "127.0.0.1", + "requester_metadata": None, + "requester_custom_headers": None, + "prompt_management_metadata": None, + "mcp_tool_call_metadata": None, + "vector_store_request_metadata": None, + "applied_guardrails": None, + "usage_object": None, + "cold_storage_object_key": None, + }, + "cache_hit": False, + "cache_key": None, + "saved_cache_cost": 0.0, + "request_tags": [], + "end_user": None, + "requester_ip_address": "127.0.0.1", + "messages": [{"role": "user", "content": "Hello, world!"}], + "response": {"choices": [{"message": {"content": "Hi there!"}}]}, + "error_str": None, + "error_information": None, + "model_parameters": {"stream": False}, + "hidden_params": { + "model_id": "model-123", + "cache_key": None, + "api_base": "https://api.openai.com", + "response_cost": "0.15", + "litellm_overhead_time_ms": None, + "additional_headers": None, + "batch_models": None, + "litellm_model_name": None, + "usage_object": None, + }, + "guardrail_information": None, + "standard_built_in_tools_params": None, + } # type: ignore + + def create_standard_logging_payload_with_tool_calls() -> StandardLoggingPayload: """Create a StandardLoggingPayload object with tool calls for testing""" return { "id": "test-request-id-tool-calls", + "trace_id": "test-trace-id-tool-calls", "call_type": "completion", + "stream": None, "response_cost": 0.05, "response_cost_failure_debug_info": None, "status": "success", + "custom_llm_provider": "openai", "total_tokens": 50, "prompt_tokens": 20, "completion_tokens": 30, "startTime": 1234567890.0, "endTime": 1234567891.0, "completionStartTime": 1234567890.5, - "model_map_information": {"model_map_key": "gpt-4", "model_map_value": None}, + "response_time": 1.0, + "model_map_information": { + "model_map_key": "gpt-4", + "model_map_value": None + }, "model": "gpt-4", "model_id": "model-123", "model_group": "openai-gpt", @@ -746,6 +834,7 @@ def create_standard_logging_payload_with_tool_calls() -> StandardLoggingPayload: ] }, "error_str": None, + "error_information": None, "model_parameters": {"temperature": 0.7}, "hidden_params": { "model_id": "model-123", @@ -758,14 +847,9 @@ def create_standard_logging_payload_with_tool_calls() -> StandardLoggingPayload: "litellm_model_name": None, "usage_object": None, }, - "stream": None, - "response_time": 1.0, - "error_information": None, "guardrail_information": None, "standard_built_in_tools_params": None, - "trace_id": "test-trace-id-tool-calls", - "custom_llm_provider": "openai", - } + } # type: ignore class TestDataDogLLMObsLoggerToolCalls: @@ -897,3 +981,51 @@ def test_tool_call_response_handling(self, mock_env_vars): assert len(output_tool_calls) == 1 output_function_info = output_tool_calls[0].get("function", {}) assert output_function_info.get("name") == "format_response" + + +def test_spend_metrics_in_datadog_payload(mock_env_vars): + """Test that spend metrics are correctly included in DataDog LLM Observability payloads""" + with patch( + "litellm.integrations.datadog.datadog_llm_obs.get_async_httpx_client" + ), patch("asyncio.create_task"): + logger = DataDogLLMObsLogger() + + standard_payload = create_standard_logging_payload_with_spend_metrics() + + kwargs = { + "standard_logging_object": standard_payload, + "litellm_params": {"metadata": {}}, + } + + start_time = datetime.now() + end_time = datetime.now() + + payload = logger.create_llm_obs_payload(kwargs, start_time, end_time) + + # Verify basic payload structure + assert payload.get("name") == "litellm_llm_call" + assert payload.get("status") == "ok" + + # Verify spend metrics are included in metadata + meta = payload.get("meta", {}) + assert meta is not None, "Meta section should exist in payload" + + metadata = meta.get("metadata", {}) + assert metadata is not None, "Metadata section should exist in meta" + + spend_metrics = metadata.get("spend_metrics", {}) + assert spend_metrics, "Spend metrics should exist in metadata" + + # Check that all three spend metrics are present + assert "litellm_spend_metric" in spend_metrics + assert "litellm_api_key_max_budget_metric" in spend_metrics + assert "litellm_api_key_budget_remaining_hours_metric" in spend_metrics + + # Verify the values are correct + assert spend_metrics["litellm_spend_metric"] == 0.15 # response_cost + assert spend_metrics["litellm_api_key_max_budget_metric"] == 10.0 # max budget + + # Verify remaining hours is a reasonable value (should be close to 24 since we set it to 24 hours from now) + remaining_hours = spend_metrics["litellm_api_key_budget_remaining_hours_metric"] + assert isinstance(remaining_hours, (int, float)) + assert 20 <= remaining_hours <= 25 # Should be close to 24 hours From e694cc102a3f0050b2a0a7ea9da518d74b9cbe28 Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Sun, 14 Sep 2025 14:42:26 -0400 Subject: [PATCH 2/7] feat: Add Spend metrics in datadog --- .../integrations/datadog/datadog_llm_obs.py | 21 +- litellm/types/integrations/datadog_llm_obs.py | 3 +- .../datadog/test_datadog_llm_observability.py | 365 ++++++++++++------ 3 files changed, 264 insertions(+), 125 deletions(-) diff --git a/litellm/integrations/datadog/datadog_llm_obs.py b/litellm/integrations/datadog/datadog_llm_obs.py index 457fafd75b4d..574079908bcf 100644 --- a/litellm/integrations/datadog/datadog_llm_obs.py +++ b/litellm/integrations/datadog/datadog_llm_obs.py @@ -148,10 +148,27 @@ async def async_send_batch(self): ), ), } - verbose_logger.debug("payload %s", json.dumps(payload, indent=4)) + # serialize datetime objects - for budget reset time in spend metrics + import json + from datetime import datetime, date + + def custom_json_encoder(obj): + if isinstance(obj, (datetime, date)): + return obj.isoformat() + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + # Serialize payload with custom encoder for debugging + try: + verbose_logger.debug("payload %s", json.dumps(payload, indent=4, default=custom_json_encoder)) + except Exception as debug_error: + verbose_logger.debug("payload serialization failed: %s", str(debug_error)) + + # Convert payload to JSON string with custom encoder for HTTP request + json_payload = json.dumps(payload, default=custom_json_encoder) + response = await self.async_client.post( url=self.intake_url, - json=payload, + content=json_payload, headers={ "DD-API-KEY": self.DD_API_KEY, "Content-Type": "application/json", diff --git a/litellm/types/integrations/datadog_llm_obs.py b/litellm/types/integrations/datadog_llm_obs.py index 8b94dd7b5910..85110191d272 100644 --- a/litellm/types/integrations/datadog_llm_obs.py +++ b/litellm/types/integrations/datadog_llm_obs.py @@ -85,5 +85,4 @@ class DDLLMObsLatencyMetrics(TypedDict, total=False): class DDLLMObsSpendMetrics(TypedDict, total=False): litellm_spend_metric: float litellm_api_key_max_budget_metric: float - litellm_remaining_api_key_budget_metric: float - litellm_api_key_budget_remaining_hours_metric: float + litellm_api_key_budget_remaining_hours_metric: float \ No newline at end of file diff --git a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py index f167eb24b2ec..853eff7e6411 100644 --- a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py +++ b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py @@ -657,87 +657,6 @@ def test_guardrail_information_in_metadata(mock_env_vars): assert guardrail_info["guardrail_response"]["score"] == 0.1 -def create_standard_logging_payload_with_spend_metrics() -> StandardLoggingPayload: - """Create a StandardLoggingPayload object with spend metrics for testing""" - from datetime import datetime, timezone - - # Create a budget reset time 24 hours from now - budget_reset_at = datetime.now(timezone.utc) + timedelta(hours=24) - - return { - "id": "test-request-id-spend", - "trace_id": "test-trace-id-spend", - "call_type": "completion", - "stream": None, - "response_cost": 0.15, - "response_cost_failure_debug_info": None, - "status": "success", - "custom_llm_provider": "openai", - "total_tokens": 30, - "prompt_tokens": 10, - "completion_tokens": 20, - "startTime": 1234567890.0, - "endTime": 1234567891.0, - "completionStartTime": 1234567890.5, - "response_time": 1.0, - "model_map_information": { - "model_map_key": "gpt-4", - "model_map_value": None - }, - "model": "gpt-4", - "model_id": "model-123", - "model_group": "openai-gpt", - "api_base": "https://api.openai.com", - "metadata": { - "user_api_key_hash": "test_hash", - "user_api_key_org_id": None, - "user_api_key_alias": "test_alias", - "user_api_key_team_id": "test_team", - "user_api_key_user_id": "test_user", - "user_api_key_team_alias": "test_team_alias", - "user_api_key_user_email": None, - "user_api_key_end_user_id": None, - "user_api_key_request_route": None, - "user_api_key_max_budget": 10.0, # $10 max budget - "user_api_key_budget_reset_at": budget_reset_at.isoformat(), - "spend_logs_metadata": None, - "requester_ip_address": "127.0.0.1", - "requester_metadata": None, - "requester_custom_headers": None, - "prompt_management_metadata": None, - "mcp_tool_call_metadata": None, - "vector_store_request_metadata": None, - "applied_guardrails": None, - "usage_object": None, - "cold_storage_object_key": None, - }, - "cache_hit": False, - "cache_key": None, - "saved_cache_cost": 0.0, - "request_tags": [], - "end_user": None, - "requester_ip_address": "127.0.0.1", - "messages": [{"role": "user", "content": "Hello, world!"}], - "response": {"choices": [{"message": {"content": "Hi there!"}}]}, - "error_str": None, - "error_information": None, - "model_parameters": {"stream": False}, - "hidden_params": { - "model_id": "model-123", - "cache_key": None, - "api_base": "https://api.openai.com", - "response_cost": "0.15", - "litellm_overhead_time_ms": None, - "additional_headers": None, - "batch_models": None, - "litellm_model_name": None, - "usage_object": None, - }, - "guardrail_information": None, - "standard_built_in_tools_params": None, - } # type: ignore - - def create_standard_logging_payload_with_tool_calls() -> StandardLoggingPayload: """Create a StandardLoggingPayload object with tool calls for testing""" return { @@ -983,49 +902,253 @@ def test_tool_call_response_handling(self, mock_env_vars): assert output_function_info.get("name") == "format_response" -def test_spend_metrics_in_datadog_payload(mock_env_vars): +def create_standard_logging_payload() -> StandardLoggingPayload: + """Create a standard logging payload for testing""" + return { + "id": "test_id", + "trace_id": "test_trace_id", + "call_type": "completion", + "stream": False, + "response_cost": 0.1, + "response_cost_failure_debug_info": None, + "status": "success", + "custom_llm_provider": None, + "total_tokens": 30, + "prompt_tokens": 20, + "completion_tokens": 10, + "startTime": 1234567890.0, + "endTime": 1234567891.0, + "completionStartTime": 1234567890.5, + "response_time": 1.0, + "model_map_information": { + "model_map_key": "gpt-3.5-turbo", + "model_map_value": None + }, + "model": "gpt-3.5-turbo", + "model_id": "model-123", + "model_group": "openai-gpt", + "api_base": "https://api.openai.com", + "metadata": { + "user_api_key_hash": "test_hash", + "user_api_key_org_id": None, + "user_api_key_alias": "test_alias", + "user_api_key_team_id": "test_team", + "user_api_key_user_id": "test_user", + "user_api_key_team_alias": "test_team_alias", + "user_api_key_end_user_id": None, + "user_api_key_request_route": None, + "user_api_key_max_budget": None, + "user_api_key_budget_reset_at": None, + "user_api_key_user_email": None, + "spend_logs_metadata": None, + "requester_ip_address": "127.0.0.1", + "requester_metadata": None, + "requester_custom_headers": None, + "prompt_management_metadata": None, + "mcp_tool_call_metadata": None, + "vector_store_request_metadata": None, + "applied_guardrails": None, + "usage_object": None, + "cold_storage_object_key": None, + }, + "cache_hit": False, + "cache_key": None, + "saved_cache_cost": 0.0, + "request_tags": [], + "end_user": None, + "requester_ip_address": "127.0.0.1", + "messages": [{"role": "user", "content": "Hello, world!"}], + "response": {"choices": [{"message": {"content": "Hi there!"}}]}, + "error_str": None, + "model_parameters": {"stream": True}, + "hidden_params": { + "model_id": "model-123", + "cache_key": None, + "api_base": "https://api.openai.com", + "response_cost": "0.1", + "additional_headers": None, + "litellm_overhead_time_ms": None, + "batch_models": None, + "litellm_model_name": None, + "usage_object": None, + }, + "error_information": None, + "guardrail_information": None, + "standard_built_in_tools_params": None, + } # type: ignore + + +def create_standard_logging_payload_with_spend_metrics() -> StandardLoggingPayload: + """Create a StandardLoggingPayload object with spend metrics for testing""" + from datetime import datetime, timezone + + # Create a budget reset time 24 hours from now + budget_reset_at = datetime.now(timezone.utc) + timedelta(hours=24) + + return { + "id": "test-request-id-spend", + "trace_id": "test-trace-id-spend", + "call_type": "completion", + "stream": None, + "response_cost": 0.15, + "response_cost_failure_debug_info": None, + "status": "success", + "custom_llm_provider": "openai", + "total_tokens": 30, + "prompt_tokens": 10, + "completion_tokens": 20, + "startTime": 1234567890.0, + "endTime": 1234567891.0, + "completionStartTime": 1234567890.5, + "response_time": 1.0, + "model_map_information": { + "model_map_key": "gpt-4", + "model_map_value": None + }, + "model": "gpt-4", + "model_id": "model-123", + "model_group": "openai-gpt", + "api_base": "https://api.openai.com", + "metadata": { + "user_api_key_hash": "test_hash", + "user_api_key_org_id": None, + "user_api_key_alias": "test_alias", + "user_api_key_team_id": "test_team", + "user_api_key_user_id": "test_user", + "user_api_key_team_alias": "test_team_alias", + "user_api_key_user_email": None, + "user_api_key_end_user_id": None, + "user_api_key_request_route": None, + "user_api_key_max_budget": 10.0, # $10 max budget + "user_api_key_budget_reset_at": budget_reset_at.isoformat(), + "spend_logs_metadata": None, + "requester_ip_address": "127.0.0.1", + "requester_metadata": None, + "requester_custom_headers": None, + "prompt_management_metadata": None, + "mcp_tool_call_metadata": None, + "vector_store_request_metadata": None, + "applied_guardrails": None, + "usage_object": None, + "cold_storage_object_key": None, + }, + "cache_hit": False, + "cache_key": None, + "saved_cache_cost": 0.0, + "request_tags": [], + "end_user": None, + "requester_ip_address": "127.0.0.1", + "messages": [{"role": "user", "content": "Hello, world!"}], + "response": {"choices": [{"message": {"content": "Hi there!"}}]}, + "error_str": None, + "error_information": None, + "model_parameters": {"stream": False}, + "hidden_params": { + "model_id": "model-123", + "cache_key": None, + "api_base": "https://api.openai.com", + "response_cost": "0.15", + "litellm_overhead_time_ms": None, + "additional_headers": None, + "batch_models": None, + "litellm_model_name": None, + "usage_object": None, + }, + "guardrail_information": None, + "standard_built_in_tools_params": None, + } # type: ignore + + +@pytest.mark.asyncio +async def test_datadog_llm_obs_spend_metrics(): + """Test that budget metrics are properly extracted and logged""" + datadog_llm_obs_logger = DataDogLLMObsLogger() + + # Create a standard logging payload with budget metadata + payload = create_standard_logging_payload() + + # Add budget information to metadata + payload["metadata"]["user_api_key_max_budget"] = 10.0 + payload["metadata"]["user_api_key_budget_reset_at"] = "2025-09-15T00:00:00+00:00" + + # Test the _get_spend_metrics method + spend_metrics = datadog_llm_obs_logger._get_spend_metrics(payload) + + # Verify budget metrics are present + assert "litellm_api_key_max_budget_metric" in spend_metrics + assert spend_metrics["litellm_api_key_max_budget_metric"] == 10.0 + + assert "litellm_api_key_budget_remaining_hours_metric" in spend_metrics + # The remaining hours should be calculated based on the reset time + assert spend_metrics["litellm_api_key_budget_remaining_hours_metric"] >= 0 + + print(f"Spend metrics: {spend_metrics}") + + +@pytest.mark.asyncio +async def test_datadog_llm_obs_spend_metrics_no_budget(): + """Test that spend metrics work when no budget is set""" + datadog_llm_obs_logger = DataDogLLMObsLogger() + + # Create a standard logging payload without budget metadata + payload = create_standard_logging_payload() + + # Test the _get_spend_metrics method + spend_metrics = datadog_llm_obs_logger._get_spend_metrics(payload) + + # Verify only response cost is present + assert "litellm_spend_metric" in spend_metrics + assert spend_metrics["litellm_spend_metric"] == 0.1 + + # Budget metrics should not be present + assert "litellm_api_key_max_budget_metric" not in spend_metrics + assert "litellm_api_key_budget_remaining_hours_metric" not in spend_metrics + + print(f"Spend metrics (no budget): {spend_metrics}") + + +@pytest.mark.asyncio +async def test_spend_metrics_in_datadog_payload(): """Test that spend metrics are correctly included in DataDog LLM Observability payloads""" - with patch( - "litellm.integrations.datadog.datadog_llm_obs.get_async_httpx_client" - ), patch("asyncio.create_task"): - logger = DataDogLLMObsLogger() + datadog_llm_obs_logger = DataDogLLMObsLogger() - standard_payload = create_standard_logging_payload_with_spend_metrics() + standard_payload = create_standard_logging_payload_with_spend_metrics() - kwargs = { - "standard_logging_object": standard_payload, - "litellm_params": {"metadata": {}}, - } + kwargs = { + "standard_logging_object": standard_payload, + "litellm_params": {"metadata": {}}, + } - start_time = datetime.now() - end_time = datetime.now() + start_time = datetime.now() + end_time = datetime.now() - payload = logger.create_llm_obs_payload(kwargs, start_time, end_time) + payload = datadog_llm_obs_logger.create_llm_obs_payload(kwargs, start_time, end_time) + + # Verify basic payload structure + assert payload.get("name") == "litellm_llm_call" + assert payload.get("status") == "ok" + + # Verify spend metrics are included in metadata + meta = payload.get("meta", {}) + assert meta is not None, "Meta section should exist in payload" + + metadata = meta.get("metadata", {}) + assert metadata is not None, "Metadata section should exist in meta" + + spend_metrics = metadata.get("spend_metrics", {}) + assert spend_metrics, "Spend metrics should exist in metadata" + + # Check that all three spend metrics are present + assert "litellm_spend_metric" in spend_metrics + assert "litellm_api_key_max_budget_metric" in spend_metrics + assert "litellm_api_key_budget_remaining_hours_metric" in spend_metrics + + # Verify the values are correct + assert spend_metrics["litellm_spend_metric"] == 0.15 # response_cost + assert spend_metrics["litellm_api_key_max_budget_metric"] == 10.0 # max budget + + # Verify remaining hours is a reasonable value (should be close to 24 since we set it to 24 hours from now) + remaining_hours = spend_metrics["litellm_api_key_budget_remaining_hours_metric"] + assert isinstance(remaining_hours, (int, float)) + assert 20 <= remaining_hours <= 25 # Should be close to 24 hours - # Verify basic payload structure - assert payload.get("name") == "litellm_llm_call" - assert payload.get("status") == "ok" - - # Verify spend metrics are included in metadata - meta = payload.get("meta", {}) - assert meta is not None, "Meta section should exist in payload" - - metadata = meta.get("metadata", {}) - assert metadata is not None, "Metadata section should exist in meta" - - spend_metrics = metadata.get("spend_metrics", {}) - assert spend_metrics, "Spend metrics should exist in metadata" - - # Check that all three spend metrics are present - assert "litellm_spend_metric" in spend_metrics - assert "litellm_api_key_max_budget_metric" in spend_metrics - assert "litellm_api_key_budget_remaining_hours_metric" in spend_metrics - - # Verify the values are correct - assert spend_metrics["litellm_spend_metric"] == 0.15 # response_cost - assert spend_metrics["litellm_api_key_max_budget_metric"] == 10.0 # max budget - - # Verify remaining hours is a reasonable value (should be close to 24 since we set it to 24 hours from now) - remaining_hours = spend_metrics["litellm_api_key_budget_remaining_hours_metric"] - assert isinstance(remaining_hours, (int, float)) - assert 20 <= remaining_hours <= 25 # Should be close to 24 hours From e123cae06e32e6d003c0e827b5620bc7710c8fdb Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Sun, 14 Sep 2025 14:55:47 -0400 Subject: [PATCH 3/7] fix: lint errors --- litellm/integrations/datadog/datadog_llm_obs.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/litellm/integrations/datadog/datadog_llm_obs.py b/litellm/integrations/datadog/datadog_llm_obs.py index 574079908bcf..f7a48e870452 100644 --- a/litellm/integrations/datadog/datadog_llm_obs.py +++ b/litellm/integrations/datadog/datadog_llm_obs.py @@ -585,18 +585,29 @@ def _get_spend_metrics( # API key max budget user_api_key_max_budget = metadata.get("user_api_key_max_budget") if user_api_key_max_budget is not None: - spend_metrics["litellm_api_key_max_budget_metric"] = user_api_key_max_budget + # type casting to make sure its a float value + try: + if isinstance(user_api_key_max_budget, (int, float)): + spend_metrics["litellm_api_key_max_budget_metric"] = float(user_api_key_max_budget) + elif isinstance(user_api_key_max_budget, str): + spend_metrics["litellm_api_key_max_budget_metric"] = float(user_api_key_max_budget) + except (ValueError, TypeError): + verbose_logger.debug(f"Invalid user_api_key_max_budget value: {user_api_key_max_budget}") # API key budget remaining hours user_api_key_budget_reset_at = metadata.get("user_api_key_budget_reset_at") if user_api_key_budget_reset_at is not None: try: from datetime import datetime + budget_reset_at: datetime if isinstance(user_api_key_budget_reset_at, str): # Parse ISO string if it's a string budget_reset_at = datetime.fromisoformat(user_api_key_budget_reset_at.replace('Z', '+00:00')) - else: + elif isinstance(user_api_key_budget_reset_at, datetime): budget_reset_at = user_api_key_budget_reset_at + else: + verbose_logger.debug(f"Invalid user_api_key_budget_reset_at type: {type(user_api_key_budget_reset_at)}") + return spend_metrics remaining_hours = ( budget_reset_at - datetime.now(budget_reset_at.tzinfo) From f5f00043cebf3d56f58218eb35df7594a48065b5 Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Tue, 16 Sep 2025 00:14:48 -0400 Subject: [PATCH 4/7] fixed tests --- .../integrations/datadog/test_datadog_llm_observability.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py index 853eff7e6411..08ee29bb60f7 100644 --- a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py +++ b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py @@ -1060,7 +1060,7 @@ def create_standard_logging_payload_with_spend_metrics() -> StandardLoggingPaylo @pytest.mark.asyncio -async def test_datadog_llm_obs_spend_metrics(): +async def test_datadog_llm_obs_spend_metrics(mock_env_vars): """Test that budget metrics are properly extracted and logged""" datadog_llm_obs_logger = DataDogLLMObsLogger() @@ -1086,7 +1086,7 @@ async def test_datadog_llm_obs_spend_metrics(): @pytest.mark.asyncio -async def test_datadog_llm_obs_spend_metrics_no_budget(): +async def test_datadog_llm_obs_spend_metrics_no_budget(mock_env_vars): """Test that spend metrics work when no budget is set""" datadog_llm_obs_logger = DataDogLLMObsLogger() @@ -1108,7 +1108,7 @@ async def test_datadog_llm_obs_spend_metrics_no_budget(): @pytest.mark.asyncio -async def test_spend_metrics_in_datadog_payload(): +async def test_spend_metrics_in_datadog_payload(mock_env_vars): """Test that spend metrics are correctly included in DataDog LLM Observability payloads""" datadog_llm_obs_logger = DataDogLLMObsLogger() From fc7de7a1c5bf016414033ee84ba5ee829ead2069 Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Wed, 17 Sep 2025 00:10:02 -0400 Subject: [PATCH 5/7] added user_api_key_spend --- .../integrations/datadog/datadog_llm_obs.py | 73 +++++++++---------- litellm/proxy/litellm_pre_call_utils.py | 1 + litellm/types/integrations/datadog_llm_obs.py | 7 +- litellm/types/utils.py | 3 + 4 files changed, 44 insertions(+), 40 deletions(-) diff --git a/litellm/integrations/datadog/datadog_llm_obs.py b/litellm/integrations/datadog/datadog_llm_obs.py index f7a48e870452..588309191c37 100644 --- a/litellm/integrations/datadog/datadog_llm_obs.py +++ b/litellm/integrations/datadog/datadog_llm_obs.py @@ -148,23 +148,16 @@ async def async_send_batch(self): ), ), } - # serialize datetime objects - for budget reset time in spend metrics - import json - from datetime import datetime, date - def custom_json_encoder(obj): - if isinstance(obj, (datetime, date)): - return obj.isoformat() - raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + # serialize datetime objects - for budget reset time in spend metrics + from litellm.litellm_core_utils.safe_json_dumps import safe_dumps - # Serialize payload with custom encoder for debugging try: - verbose_logger.debug("payload %s", json.dumps(payload, indent=4, default=custom_json_encoder)) + verbose_logger.debug("payload %s", safe_dumps(payload)) except Exception as debug_error: verbose_logger.debug("payload serialization failed: %s", str(debug_error)) - # Convert payload to JSON string with custom encoder for HTTP request - json_payload = json.dumps(payload, default=custom_json_encoder) + json_payload = safe_dumps(payload) response = await self.async_client.post( url=self.intake_url, @@ -338,7 +331,6 @@ def _get_response_messages( if isinstance(response_obj, str): try: import ast - response_obj = ast.literal_eval(response_obj) except (ValueError, SyntaxError): try: @@ -573,11 +565,9 @@ def _get_spend_metrics( Get the spend metrics from the standard logging payload """ spend_metrics: DDLLMObsSpendMetrics = DDLLMObsSpendMetrics() - - # Get response cost for litellm_spend_metric - response_cost = standard_logging_payload.get("response_cost", 0.0) - if response_cost > 0: - spend_metrics["litellm_spend_metric"] = response_cost + + # send response cost + spend_metrics["response_cost"] = standard_logging_payload.get("response_cost", 0.0) # Get budget information from metadata metadata = standard_logging_payload.get("metadata", {}) @@ -585,36 +575,45 @@ def _get_spend_metrics( # API key max budget user_api_key_max_budget = metadata.get("user_api_key_max_budget") if user_api_key_max_budget is not None: - # type casting to make sure its a float value + spend_metrics["user_api_key_max_budget"] = float(user_api_key_max_budget) + + # API key spend + user_api_key_spend = metadata.get("user_api_key_spend") + if user_api_key_spend is not None: try: - if isinstance(user_api_key_max_budget, (int, float)): - spend_metrics["litellm_api_key_max_budget_metric"] = float(user_api_key_max_budget) - elif isinstance(user_api_key_max_budget, str): - spend_metrics["litellm_api_key_max_budget_metric"] = float(user_api_key_max_budget) + spend_metrics["user_api_key_spend"] = float(user_api_key_spend) except (ValueError, TypeError): - verbose_logger.debug(f"Invalid user_api_key_max_budget value: {user_api_key_max_budget}") + verbose_logger.debug(f"Invalid user_api_key_spend value: {user_api_key_spend}") - # API key budget remaining hours + # API key budget reset datetime user_api_key_budget_reset_at = metadata.get("user_api_key_budget_reset_at") if user_api_key_budget_reset_at is not None: try: - from datetime import datetime - budget_reset_at: datetime + from datetime import datetime, timezone + + budget_reset_at = None if isinstance(user_api_key_budget_reset_at, str): - # Parse ISO string if it's a string - budget_reset_at = datetime.fromisoformat(user_api_key_budget_reset_at.replace('Z', '+00:00')) + # Handle ISO format strings that might have 'Z' suffix + iso_string = user_api_key_budget_reset_at.replace('Z', '+00:00') + budget_reset_at = datetime.fromisoformat(iso_string) elif isinstance(user_api_key_budget_reset_at, datetime): budget_reset_at = user_api_key_budget_reset_at - else: - verbose_logger.debug(f"Invalid user_api_key_budget_reset_at type: {type(user_api_key_budget_reset_at)}") - return spend_metrics - - remaining_hours = ( - budget_reset_at - datetime.now(budget_reset_at.tzinfo) - ).total_seconds() / 3600 - spend_metrics["litellm_api_key_budget_remaining_hours_metric"] = max(0, remaining_hours) + + if budget_reset_at is not None: + # Preserve timezone info if already present + if budget_reset_at.tzinfo is None: + budget_reset_at = budget_reset_at.replace(tzinfo=timezone.utc) + + # Convert to ISO string format for JSON serialization + # This prevents circular reference issues and ensures proper timezone representation + iso_string = budget_reset_at.isoformat() + spend_metrics["user_api_key_budget_reset_at"] = iso_string + + # Debug logging to verify the conversion + verbose_logger.debug(f"Converted budget_reset_at to ISO format: {iso_string}") except Exception as e: - verbose_logger.debug(f"Error calculating remaining hours for budget reset: {e}") + verbose_logger.debug(f"Error processing budget reset datetime: {e}") + verbose_logger.debug(f"Original value: {user_api_key_budget_reset_at}") return spend_metrics diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index e82c8b229567..020cd3e8cde2 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -546,6 +546,7 @@ def get_sanitized_user_information_from_key( user_api_key_end_user_id=user_api_key_dict.end_user_id, user_api_key_user_email=user_api_key_dict.user_email, user_api_key_request_route=user_api_key_dict.request_route, + user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at, ) return user_api_key_logged_metadata diff --git a/litellm/types/integrations/datadog_llm_obs.py b/litellm/types/integrations/datadog_llm_obs.py index 85110191d272..ed80a1add5c9 100644 --- a/litellm/types/integrations/datadog_llm_obs.py +++ b/litellm/types/integrations/datadog_llm_obs.py @@ -83,6 +83,7 @@ class DDLLMObsLatencyMetrics(TypedDict, total=False): guardrail_overhead_time_ms: float class DDLLMObsSpendMetrics(TypedDict, total=False): - litellm_spend_metric: float - litellm_api_key_max_budget_metric: float - litellm_api_key_budget_remaining_hours_metric: float \ No newline at end of file + response_cost: float + user_api_key_spend: float + user_api_key_max_budget: float + user_api_key_budget_reset_at: str \ No newline at end of file diff --git a/litellm/types/utils.py b/litellm/types/utils.py index e54516371e52..0abce2bcd2dc 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1807,6 +1807,9 @@ async def __anext__(self): class StandardLoggingUserAPIKeyMetadata(TypedDict): user_api_key_hash: Optional[str] # hash of the litellm virtual key used user_api_key_alias: Optional[str] + user_api_key_spend: Optional[float] + user_api_key_max_budget: Optional[float] = None + user_api_key_budget_reset_at: Optional[str] = None user_api_key_org_id: Optional[str] user_api_key_team_id: Optional[str] user_api_key_user_id: Optional[str] From e56b11cb544b518cfacbabff752725779bc783b2 Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Wed, 17 Sep 2025 00:19:03 -0400 Subject: [PATCH 6/7] added tests --- .../integrations/datadog/datadog_llm_obs.py | 35 ++-- litellm/types/integrations/datadog_llm_obs.py | 3 +- .../datadog/test_datadog_llm_observability.py | 167 ++++++------------ 3 files changed, 82 insertions(+), 123 deletions(-) diff --git a/litellm/integrations/datadog/datadog_llm_obs.py b/litellm/integrations/datadog/datadog_llm_obs.py index 588309191c37..7ab82eb78478 100644 --- a/litellm/integrations/datadog/datadog_llm_obs.py +++ b/litellm/integrations/datadog/datadog_llm_obs.py @@ -148,17 +148,19 @@ async def async_send_batch(self): ), ), } - + # serialize datetime objects - for budget reset time in spend metrics from litellm.litellm_core_utils.safe_json_dumps import safe_dumps - + try: verbose_logger.debug("payload %s", safe_dumps(payload)) except Exception as debug_error: - verbose_logger.debug("payload serialization failed: %s", str(debug_error)) - + verbose_logger.debug( + "payload serialization failed: %s", str(debug_error) + ) + json_payload = safe_dumps(payload) - + response = await self.async_client.post( url=self.intake_url, content=json_payload, @@ -331,6 +333,7 @@ def _get_response_messages( if isinstance(response_obj, str): try: import ast + response_obj = ast.literal_eval(response_obj) except (ValueError, SyntaxError): try: @@ -557,9 +560,9 @@ def _get_latency_metrics( ) return latency_metrics - + def _get_spend_metrics( - self, standard_logging_payload: StandardLoggingPayload + self, standard_logging_payload: StandardLoggingPayload ) -> DDLLMObsSpendMetrics: """ Get the spend metrics from the standard logging payload @@ -567,11 +570,13 @@ def _get_spend_metrics( spend_metrics: DDLLMObsSpendMetrics = DDLLMObsSpendMetrics() # send response cost - spend_metrics["response_cost"] = standard_logging_payload.get("response_cost", 0.0) + spend_metrics["response_cost"] = standard_logging_payload.get( + "response_cost", 0.0 + ) # Get budget information from metadata metadata = standard_logging_payload.get("metadata", {}) - + # API key max budget user_api_key_max_budget = metadata.get("user_api_key_max_budget") if user_api_key_max_budget is not None: @@ -583,7 +588,9 @@ def _get_spend_metrics( try: spend_metrics["user_api_key_spend"] = float(user_api_key_spend) except (ValueError, TypeError): - verbose_logger.debug(f"Invalid user_api_key_spend value: {user_api_key_spend}") + verbose_logger.debug( + f"Invalid user_api_key_spend value: {user_api_key_spend}" + ) # API key budget reset datetime user_api_key_budget_reset_at = metadata.get("user_api_key_budget_reset_at") @@ -594,7 +601,7 @@ def _get_spend_metrics( budget_reset_at = None if isinstance(user_api_key_budget_reset_at, str): # Handle ISO format strings that might have 'Z' suffix - iso_string = user_api_key_budget_reset_at.replace('Z', '+00:00') + iso_string = user_api_key_budget_reset_at.replace("Z", "+00:00") budget_reset_at = datetime.fromisoformat(iso_string) elif isinstance(user_api_key_budget_reset_at, datetime): budget_reset_at = user_api_key_budget_reset_at @@ -608,9 +615,11 @@ def _get_spend_metrics( # This prevents circular reference issues and ensures proper timezone representation iso_string = budget_reset_at.isoformat() spend_metrics["user_api_key_budget_reset_at"] = iso_string - + # Debug logging to verify the conversion - verbose_logger.debug(f"Converted budget_reset_at to ISO format: {iso_string}") + verbose_logger.debug( + f"Converted budget_reset_at to ISO format: {iso_string}" + ) except Exception as e: verbose_logger.debug(f"Error processing budget reset datetime: {e}") verbose_logger.debug(f"Original value: {user_api_key_budget_reset_at}") diff --git a/litellm/types/integrations/datadog_llm_obs.py b/litellm/types/integrations/datadog_llm_obs.py index ed80a1add5c9..47ffbb0bbe66 100644 --- a/litellm/types/integrations/datadog_llm_obs.py +++ b/litellm/types/integrations/datadog_llm_obs.py @@ -82,8 +82,9 @@ class DDLLMObsLatencyMetrics(TypedDict, total=False): litellm_overhead_time_ms: float guardrail_overhead_time_ms: float + class DDLLMObsSpendMetrics(TypedDict, total=False): response_cost: float user_api_key_spend: float user_api_key_max_budget: float - user_api_key_budget_reset_at: str \ No newline at end of file + user_api_key_budget_reset_at: str diff --git a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py index 08ee29bb60f7..e715fec4ffd9 100644 --- a/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py +++ b/tests/test_litellm/integrations/datadog/test_datadog_llm_observability.py @@ -1,7 +1,7 @@ import asyncio import os import sys -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Optional from unittest.mock import Mock, patch, MagicMock @@ -901,89 +901,12 @@ def test_tool_call_response_handling(self, mock_env_vars): output_function_info = output_tool_calls[0].get("function", {}) assert output_function_info.get("name") == "format_response" - -def create_standard_logging_payload() -> StandardLoggingPayload: - """Create a standard logging payload for testing""" - return { - "id": "test_id", - "trace_id": "test_trace_id", - "call_type": "completion", - "stream": False, - "response_cost": 0.1, - "response_cost_failure_debug_info": None, - "status": "success", - "custom_llm_provider": None, - "total_tokens": 30, - "prompt_tokens": 20, - "completion_tokens": 10, - "startTime": 1234567890.0, - "endTime": 1234567891.0, - "completionStartTime": 1234567890.5, - "response_time": 1.0, - "model_map_information": { - "model_map_key": "gpt-3.5-turbo", - "model_map_value": None - }, - "model": "gpt-3.5-turbo", - "model_id": "model-123", - "model_group": "openai-gpt", - "api_base": "https://api.openai.com", - "metadata": { - "user_api_key_hash": "test_hash", - "user_api_key_org_id": None, - "user_api_key_alias": "test_alias", - "user_api_key_team_id": "test_team", - "user_api_key_user_id": "test_user", - "user_api_key_team_alias": "test_team_alias", - "user_api_key_end_user_id": None, - "user_api_key_request_route": None, - "user_api_key_max_budget": None, - "user_api_key_budget_reset_at": None, - "user_api_key_user_email": None, - "spend_logs_metadata": None, - "requester_ip_address": "127.0.0.1", - "requester_metadata": None, - "requester_custom_headers": None, - "prompt_management_metadata": None, - "mcp_tool_call_metadata": None, - "vector_store_request_metadata": None, - "applied_guardrails": None, - "usage_object": None, - "cold_storage_object_key": None, - }, - "cache_hit": False, - "cache_key": None, - "saved_cache_cost": 0.0, - "request_tags": [], - "end_user": None, - "requester_ip_address": "127.0.0.1", - "messages": [{"role": "user", "content": "Hello, world!"}], - "response": {"choices": [{"message": {"content": "Hi there!"}}]}, - "error_str": None, - "model_parameters": {"stream": True}, - "hidden_params": { - "model_id": "model-123", - "cache_key": None, - "api_base": "https://api.openai.com", - "response_cost": "0.1", - "additional_headers": None, - "litellm_overhead_time_ms": None, - "batch_models": None, - "litellm_model_name": None, - "usage_object": None, - }, - "error_information": None, - "guardrail_information": None, - "standard_built_in_tools_params": None, - } # type: ignore - - def create_standard_logging_payload_with_spend_metrics() -> StandardLoggingPayload: """Create a StandardLoggingPayload object with spend metrics for testing""" from datetime import datetime, timezone - # Create a budget reset time 24 hours from now - budget_reset_at = datetime.now(timezone.utc) + timedelta(hours=24) + # Create a budget reset time 10 days from now (using "10d" format) + budget_reset_at = datetime.now(timezone.utc) + timedelta(days=10) return { "id": "test-request-id-spend", @@ -1019,8 +942,9 @@ def create_standard_logging_payload_with_spend_metrics() -> StandardLoggingPaylo "user_api_key_user_email": None, "user_api_key_end_user_id": None, "user_api_key_request_route": None, + "user_api_key_spend": 0.67, "user_api_key_max_budget": 10.0, # $10 max budget - "user_api_key_budget_reset_at": budget_reset_at.isoformat(), + "user_api_key_budget_reset_at": budget_reset_at.isoformat(), # ISO format: 2025-09-26T... "spend_logs_metadata": None, "requester_ip_address": "127.0.0.1", "requester_metadata": None, @@ -1064,23 +988,32 @@ async def test_datadog_llm_obs_spend_metrics(mock_env_vars): """Test that budget metrics are properly extracted and logged""" datadog_llm_obs_logger = DataDogLLMObsLogger() - # Create a standard logging payload with budget metadata - payload = create_standard_logging_payload() + # Create a standard logging payload with spend metrics + payload = create_standard_logging_payload_with_spend_metrics() - # Add budget information to metadata - payload["metadata"]["user_api_key_max_budget"] = 10.0 - payload["metadata"]["user_api_key_budget_reset_at"] = "2025-09-15T00:00:00+00:00" + # Show the budget reset time in ISO format + budget_reset_iso = payload["metadata"]["user_api_key_budget_reset_at"] + print(f"Budget reset time (ISO format): {budget_reset_iso}") + from datetime import datetime, timezone + print(f"Current time: {datetime.now(timezone.utc).isoformat()}") # Test the _get_spend_metrics method spend_metrics = datadog_llm_obs_logger._get_spend_metrics(payload) # Verify budget metrics are present - assert "litellm_api_key_max_budget_metric" in spend_metrics - assert spend_metrics["litellm_api_key_max_budget_metric"] == 10.0 - - assert "litellm_api_key_budget_remaining_hours_metric" in spend_metrics - # The remaining hours should be calculated based on the reset time - assert spend_metrics["litellm_api_key_budget_remaining_hours_metric"] >= 0 + assert "user_api_key_max_budget" in spend_metrics + assert spend_metrics["user_api_key_max_budget"] == 10.0 + + assert "user_api_key_budget_reset_at" in spend_metrics + # The budget reset should be a datetime string in ISO format + budget_reset = spend_metrics["user_api_key_budget_reset_at"] + assert isinstance(budget_reset, str) + print(f"Budget reset datetime: {budget_reset}") + # Should be close to 10 days from now + budget_reset_dt = datetime.fromisoformat(budget_reset.replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + time_diff = (budget_reset_dt - now).total_seconds() / 86400 # days + assert 9.5 <= time_diff <= 10.5 # Should be close to 10 days print(f"Spend metrics: {spend_metrics}") @@ -1091,18 +1024,22 @@ async def test_datadog_llm_obs_spend_metrics_no_budget(mock_env_vars): datadog_llm_obs_logger = DataDogLLMObsLogger() # Create a standard logging payload without budget metadata - payload = create_standard_logging_payload() + payload = create_standard_logging_payload_with_spend_metrics() + + # Remove budget-related metadata to test no-budget scenario + payload["metadata"].pop("user_api_key_max_budget", None) + payload["metadata"].pop("user_api_key_budget_reset_at", None) # Test the _get_spend_metrics method spend_metrics = datadog_llm_obs_logger._get_spend_metrics(payload) # Verify only response cost is present - assert "litellm_spend_metric" in spend_metrics - assert spend_metrics["litellm_spend_metric"] == 0.1 + assert "response_cost" in spend_metrics + assert spend_metrics["response_cost"] == 0.15 # Budget metrics should not be present - assert "litellm_api_key_max_budget_metric" not in spend_metrics - assert "litellm_api_key_budget_remaining_hours_metric" not in spend_metrics + assert "user_api_key_max_budget" not in spend_metrics + assert "user_api_key_budget_reset_at" not in spend_metrics print(f"Spend metrics (no budget): {spend_metrics}") @@ -1110,6 +1047,7 @@ async def test_datadog_llm_obs_spend_metrics_no_budget(mock_env_vars): @pytest.mark.asyncio async def test_spend_metrics_in_datadog_payload(mock_env_vars): """Test that spend metrics are correctly included in DataDog LLM Observability payloads""" + from datetime import datetime datadog_llm_obs_logger = DataDogLLMObsLogger() standard_payload = create_standard_logging_payload_with_spend_metrics() @@ -1138,17 +1076,28 @@ async def test_spend_metrics_in_datadog_payload(mock_env_vars): spend_metrics = metadata.get("spend_metrics", {}) assert spend_metrics, "Spend metrics should exist in metadata" - # Check that all three spend metrics are present - assert "litellm_spend_metric" in spend_metrics - assert "litellm_api_key_max_budget_metric" in spend_metrics - assert "litellm_api_key_budget_remaining_hours_metric" in spend_metrics + # Check that all metrics are present + assert "response_cost" in spend_metrics + assert "user_api_key_spend" in spend_metrics + assert "user_api_key_max_budget" in spend_metrics + assert "user_api_key_budget_reset_at" in spend_metrics # Verify the values are correct - assert spend_metrics["litellm_spend_metric"] == 0.15 # response_cost - assert spend_metrics["litellm_api_key_max_budget_metric"] == 10.0 # max budget - - # Verify remaining hours is a reasonable value (should be close to 24 since we set it to 24 hours from now) - remaining_hours = spend_metrics["litellm_api_key_budget_remaining_hours_metric"] - assert isinstance(remaining_hours, (int, float)) - assert 20 <= remaining_hours <= 25 # Should be close to 24 hours - + assert spend_metrics["response_cost"] == 0.15 # response_cost + assert spend_metrics["user_api_key_spend"] == 0.67 # lol + assert spend_metrics["user_api_key_max_budget"] == 10.0 # max budget + + # Verify budget reset is a datetime string in ISO format + budget_reset = spend_metrics["user_api_key_budget_reset_at"] + assert isinstance(budget_reset, str) + print(f"Budget reset in payload: {budget_reset}") # In StandardLoggingUserAPIKeyMetadata + user_api_key_budget_reset_at: Optional[str] = None + + # In DDLLMObsSpendMetrics + user_api_key_budget_reset_at: str + # Should be close to 10 days from now + from datetime import datetime, timezone + budget_reset_dt = datetime.fromisoformat(budget_reset.replace('Z', '+00:00')) + now = datetime.now(timezone.utc) + time_diff = (budget_reset_dt - now).total_seconds() / 86400 # days + assert 9.5 <= time_diff <= 10.5 # Should be close to 10 days From 439577fd35a2eb8ebe29caefc69509a0576c0ea4 Mon Sep 17 00:00:00 2001 From: mubashir1osmani Date: Wed, 17 Sep 2025 00:31:29 -0400 Subject: [PATCH 7/7] fixed lint errors --- litellm/litellm_core_utils/litellm_logging.py | 315 +++++++++--------- litellm/proxy/litellm_pre_call_utils.py | 74 ++-- .../pass_through_endpoints.py | 24 +- litellm/types/utils.py | 10 +- 4 files changed, 225 insertions(+), 198 deletions(-) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 19d7c5512ba6..ae2e0867eeac 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -299,9 +299,9 @@ def __init__( self.litellm_trace_id: str = litellm_trace_id or str(uuid.uuid4()) self.function_id = function_id self.streaming_chunks: List[Any] = [] # for generating complete stream response - self.sync_streaming_chunks: List[Any] = ( - [] - ) # for generating complete stream response + self.sync_streaming_chunks: List[ + Any + ] = [] # for generating complete stream response self.log_raw_request_response = log_raw_request_response # Initialize dynamic callbacks @@ -670,24 +670,23 @@ def get_custom_logger_for_prompt_management( if anthropic_cache_control_logger := AnthropicCacheControlHook.get_custom_logger_for_anthropic_cache_control_hook( non_default_params ): - self.model_call_details["prompt_integration"] = ( - anthropic_cache_control_logger.__class__.__name__ - ) + self.model_call_details[ + "prompt_integration" + ] = anthropic_cache_control_logger.__class__.__name__ return anthropic_cache_control_logger ######################################################### # Vector Store / Knowledge Base hooks ######################################################### if litellm.vector_store_registry is not None: - vector_store_custom_logger = _init_custom_logger_compatible_class( logging_integration="vector_store_pre_call_hook", internal_usage_cache=None, llm_router=None, ) - self.model_call_details["prompt_integration"] = ( - vector_store_custom_logger.__class__.__name__ - ) + self.model_call_details[ + "prompt_integration" + ] = vector_store_custom_logger.__class__.__name__ return vector_store_custom_logger return None @@ -739,9 +738,9 @@ def _pre_call(self, input, api_key, model=None, additional_args={}): model ): # if model name was changes pre-call, overwrite the initial model call name with the new one self.model_call_details["model"] = model - self.model_call_details["litellm_params"]["api_base"] = ( - self._get_masked_api_base(additional_args.get("api_base", "")) - ) + self.model_call_details["litellm_params"][ + "api_base" + ] = self._get_masked_api_base(additional_args.get("api_base", "")) def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915 # Log the exact input to the LLM API @@ -770,10 +769,10 @@ def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR try: # [Non-blocking Extra Debug Information in metadata] if turn_off_message_logging is True: - _metadata["raw_request"] = ( - "redacted by litellm. \ + _metadata[ + "raw_request" + ] = "redacted by litellm. \ 'litellm.turn_off_message_logging=True'" - ) else: curl_command = self._get_request_curl_command( api_base=additional_args.get("api_base", ""), @@ -784,32 +783,32 @@ def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR _metadata["raw_request"] = str(curl_command) # split up, so it's easier to parse in the UI - self.model_call_details["raw_request_typed_dict"] = ( - RawRequestTypedDict( - raw_request_api_base=str( - additional_args.get("api_base") or "" - ), - raw_request_body=self._get_raw_request_body( - additional_args.get("complete_input_dict", {}) - ), - raw_request_headers=self._get_masked_headers( - additional_args.get("headers", {}) or {}, - ignore_sensitive_headers=True, - ), - error=None, - ) + self.model_call_details[ + "raw_request_typed_dict" + ] = RawRequestTypedDict( + raw_request_api_base=str( + additional_args.get("api_base") or "" + ), + raw_request_body=self._get_raw_request_body( + additional_args.get("complete_input_dict", {}) + ), + raw_request_headers=self._get_masked_headers( + additional_args.get("headers", {}) or {}, + ignore_sensitive_headers=True, + ), + error=None, ) except Exception as e: - self.model_call_details["raw_request_typed_dict"] = ( - RawRequestTypedDict( - error=str(e), - ) + self.model_call_details[ + "raw_request_typed_dict" + ] = RawRequestTypedDict( + error=str(e), ) - _metadata["raw_request"] = ( - "Unable to Log \ + _metadata[ + "raw_request" + ] = "Unable to Log \ raw request: {}".format( - str(e) - ) + str(e) ) if getattr(self, "logger_fn", None) and callable(self.logger_fn): try: @@ -1092,13 +1091,13 @@ async def async_post_mcp_tool_call_hook( for callback in callbacks: try: if isinstance(callback, CustomLogger): - response: Optional[MCPPostCallResponseObject] = ( - await callback.async_post_mcp_tool_call_hook( - kwargs=kwargs, - response_obj=post_mcp_tool_call_response_obj, - start_time=start_time, - end_time=end_time, - ) + response: Optional[ + MCPPostCallResponseObject + ] = await callback.async_post_mcp_tool_call_hook( + kwargs=kwargs, + response_obj=post_mcp_tool_call_response_obj, + start_time=start_time, + end_time=end_time, ) ###################################################################### # if any of the callbacks modify the response, use the modified response @@ -1218,9 +1217,9 @@ def _response_cost_calculator( verbose_logger.debug( f"response_cost_failure_debug_information: {debug_info}" ) - self.model_call_details["response_cost_failure_debug_information"] = ( - debug_info - ) + self.model_call_details[ + "response_cost_failure_debug_information" + ] = debug_info return None try: @@ -1245,9 +1244,9 @@ def _response_cost_calculator( verbose_logger.debug( f"response_cost_failure_debug_information: {debug_info}" ) - self.model_call_details["response_cost_failure_debug_information"] = ( - debug_info - ) + self.model_call_details[ + "response_cost_failure_debug_information" + ] = debug_info return None @@ -1391,9 +1390,9 @@ def _success_handler_helper_fn( end_time = datetime.datetime.now() if self.completion_start_time is None: self.completion_start_time = end_time - self.model_call_details["completion_start_time"] = ( - self.completion_start_time - ) + self.model_call_details[ + "completion_start_time" + ] = self.completion_start_time self.model_call_details["log_event_type"] = "successful_api_call" self.model_call_details["end_time"] = end_time self.model_call_details["cache_hit"] = cache_hit @@ -1446,39 +1445,39 @@ def _success_handler_helper_fn( "response_cost" ] else: - self.model_call_details["response_cost"] = ( - self._response_cost_calculator(result=logging_result) - ) + self.model_call_details[ + "response_cost" + ] = self._response_cost_calculator(result=logging_result) ## STANDARDIZED LOGGING PAYLOAD - self.model_call_details["standard_logging_object"] = ( - get_standard_logging_object_payload( - kwargs=self.model_call_details, - init_response_obj=logging_result, - start_time=start_time, - end_time=end_time, - logging_obj=self, - status="success", - standard_built_in_tools_params=self.standard_built_in_tools_params, - ) + self.model_call_details[ + "standard_logging_object" + ] = get_standard_logging_object_payload( + kwargs=self.model_call_details, + init_response_obj=logging_result, + start_time=start_time, + end_time=end_time, + logging_obj=self, + status="success", + standard_built_in_tools_params=self.standard_built_in_tools_params, ) elif isinstance(result, dict) or isinstance(result, list): ## STANDARDIZED LOGGING PAYLOAD - self.model_call_details["standard_logging_object"] = ( - get_standard_logging_object_payload( - kwargs=self.model_call_details, - init_response_obj=result, - start_time=start_time, - end_time=end_time, - logging_obj=self, - status="success", - standard_built_in_tools_params=self.standard_built_in_tools_params, - ) + self.model_call_details[ + "standard_logging_object" + ] = get_standard_logging_object_payload( + kwargs=self.model_call_details, + init_response_obj=result, + start_time=start_time, + end_time=end_time, + logging_obj=self, + status="success", + standard_built_in_tools_params=self.standard_built_in_tools_params, ) elif standard_logging_object is not None: - self.model_call_details["standard_logging_object"] = ( - standard_logging_object - ) + self.model_call_details[ + "standard_logging_object" + ] = standard_logging_object else: # streaming chunks + image gen. self.model_call_details["response_cost"] = None @@ -1577,7 +1576,6 @@ def flush_passthrough_collected_chunks( ) if complete_streaming_response is not None: - self.success_handler(result=complete_streaming_response) return @@ -1630,23 +1628,23 @@ def success_handler( # noqa: PLR0915 verbose_logger.debug( "Logging Details LiteLLM-Success Call streaming complete" ) - self.model_call_details["complete_streaming_response"] = ( - complete_streaming_response - ) - self.model_call_details["response_cost"] = ( - self._response_cost_calculator(result=complete_streaming_response) - ) + self.model_call_details[ + "complete_streaming_response" + ] = complete_streaming_response + self.model_call_details[ + "response_cost" + ] = self._response_cost_calculator(result=complete_streaming_response) ## STANDARDIZED LOGGING PAYLOAD - self.model_call_details["standard_logging_object"] = ( - get_standard_logging_object_payload( - kwargs=self.model_call_details, - init_response_obj=complete_streaming_response, - start_time=start_time, - end_time=end_time, - logging_obj=self, - status="success", - standard_built_in_tools_params=self.standard_built_in_tools_params, - ) + self.model_call_details[ + "standard_logging_object" + ] = get_standard_logging_object_payload( + kwargs=self.model_call_details, + init_response_obj=complete_streaming_response, + start_time=start_time, + end_time=end_time, + logging_obj=self, + status="success", + standard_built_in_tools_params=self.standard_built_in_tools_params, ) callbacks = self.get_combined_callback_list( dynamic_success_callbacks=self.dynamic_success_callbacks, @@ -1970,10 +1968,10 @@ def success_handler( # noqa: PLR0915 ) else: if self.stream and complete_streaming_response: - self.model_call_details["complete_response"] = ( - self.model_call_details.get( - "complete_streaming_response", {} - ) + self.model_call_details[ + "complete_response" + ] = self.model_call_details.get( + "complete_streaming_response", {} ) result = self.model_call_details["complete_response"] openMeterLogger.log_success_event( @@ -2012,10 +2010,10 @@ def success_handler( # noqa: PLR0915 ) else: if self.stream and complete_streaming_response: - self.model_call_details["complete_response"] = ( - self.model_call_details.get( - "complete_streaming_response", {} - ) + self.model_call_details[ + "complete_response" + ] = self.model_call_details.get( + "complete_streaming_response", {} ) result = self.model_call_details["complete_response"] @@ -2117,10 +2115,12 @@ async def async_success_handler( # noqa: PLR0915 result.usage = batch_usage elif not is_base64_unified_file_id: # only run for non-unified file ids - response_cost, batch_usage, batch_models = ( - await _handle_completed_batch( - batch=result, custom_llm_provider=self.custom_llm_provider - ) + ( + response_cost, + batch_usage, + batch_models, + ) = await _handle_completed_batch( + batch=result, custom_llm_provider=self.custom_llm_provider ) result._hidden_params["response_cost"] = response_cost @@ -2151,9 +2151,9 @@ async def async_success_handler( # noqa: PLR0915 if complete_streaming_response is not None: print_verbose("Async success callbacks: Got a complete streaming response") - self.model_call_details["async_complete_streaming_response"] = ( - complete_streaming_response - ) + self.model_call_details[ + "async_complete_streaming_response" + ] = complete_streaming_response try: if self.model_call_details.get("cache_hit", False) is True: @@ -2164,10 +2164,10 @@ async def async_success_handler( # noqa: PLR0915 model_call_details=self.model_call_details ) # base_model defaults to None if not set on model_info - self.model_call_details["response_cost"] = ( - self._response_cost_calculator( - result=complete_streaming_response - ) + self.model_call_details[ + "response_cost" + ] = self._response_cost_calculator( + result=complete_streaming_response ) verbose_logger.debug( @@ -2180,16 +2180,16 @@ async def async_success_handler( # noqa: PLR0915 self.model_call_details["response_cost"] = None ## STANDARDIZED LOGGING PAYLOAD - self.model_call_details["standard_logging_object"] = ( - get_standard_logging_object_payload( - kwargs=self.model_call_details, - init_response_obj=complete_streaming_response, - start_time=start_time, - end_time=end_time, - logging_obj=self, - status="success", - standard_built_in_tools_params=self.standard_built_in_tools_params, - ) + self.model_call_details[ + "standard_logging_object" + ] = get_standard_logging_object_payload( + kwargs=self.model_call_details, + init_response_obj=complete_streaming_response, + start_time=start_time, + end_time=end_time, + logging_obj=self, + status="success", + standard_built_in_tools_params=self.standard_built_in_tools_params, ) callbacks = self.get_combined_callback_list( dynamic_success_callbacks=self.dynamic_async_success_callbacks, @@ -2402,18 +2402,18 @@ def _failure_handler_helper_fn( ## STANDARDIZED LOGGING PAYLOAD - self.model_call_details["standard_logging_object"] = ( - get_standard_logging_object_payload( - kwargs=self.model_call_details, - init_response_obj={}, - start_time=start_time, - end_time=end_time, - logging_obj=self, - status="failure", - error_str=str(exception), - original_exception=exception, - standard_built_in_tools_params=self.standard_built_in_tools_params, - ) + self.model_call_details[ + "standard_logging_object" + ] = get_standard_logging_object_payload( + kwargs=self.model_call_details, + init_response_obj={}, + start_time=start_time, + end_time=end_time, + logging_obj=self, + status="failure", + error_str=str(exception), + original_exception=exception, + standard_built_in_tools_params=self.standard_built_in_tools_params, ) return start_time, end_time @@ -3302,9 +3302,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915 endpoint=arize_config.endpoint, ) - os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( - f"space_id={arize_config.space_key},api_key={arize_config.api_key}" - ) + os.environ[ + "OTEL_EXPORTER_OTLP_TRACES_HEADERS" + ] = f"space_id={arize_config.space_key},api_key={arize_config.api_key}" for callback in _in_memory_loggers: if ( isinstance(callback, ArizeLogger) @@ -3328,9 +3328,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915 # auth can be disabled on local deployments of arize phoenix if arize_phoenix_config.otlp_auth_headers is not None: - os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( - arize_phoenix_config.otlp_auth_headers - ) + os.environ[ + "OTEL_EXPORTER_OTLP_TRACES_HEADERS" + ] = arize_phoenix_config.otlp_auth_headers for callback in _in_memory_loggers: if ( @@ -3367,6 +3367,7 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915 return galileo_logger # type: ignore elif logging_integration == "cloudzero": from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger + for callback in _in_memory_loggers: if isinstance(callback, CloudZeroLogger): return callback # type: ignore @@ -3437,9 +3438,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915 exporter="otlp_http", endpoint="https://langtrace.ai/api/trace", ) - os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = ( - f"api_key={os.getenv('LANGTRACE_API_KEY')}" - ) + os.environ[ + "OTEL_EXPORTER_OTLP_TRACES_HEADERS" + ] = f"api_key={os.getenv('LANGTRACE_API_KEY')}" for callback in _in_memory_loggers: if ( isinstance(callback, OpenTelemetry) @@ -3594,6 +3595,7 @@ def get_custom_logger_compatible_class( # noqa: PLR0915 return callback elif logging_integration == "cloudzero": from litellm.integrations.cloudzero.cloudzero import CloudZeroLogger + for callback in _in_memory_loggers: if isinstance(callback, CloudZeroLogger): return callback @@ -4088,10 +4090,10 @@ def get_hidden_params( for key in StandardLoggingHiddenParams.__annotations__.keys(): if key in hidden_params: if key == "additional_headers": - clean_hidden_params["additional_headers"] = ( - StandardLoggingPayloadSetup.get_additional_headers( - hidden_params[key] - ) + clean_hidden_params[ + "additional_headers" + ] = StandardLoggingPayloadSetup.get_additional_headers( + hidden_params[key] ) else: clean_hidden_params[key] = hidden_params[key] # type: ignore @@ -4504,7 +4506,7 @@ def get_standard_logging_object_payload( def emit_standard_logging_payload(payload: StandardLoggingPayload): if os.getenv("LITELLM_PRINT_STANDARD_LOGGING_PAYLOAD"): - print(json.dumps(payload, indent=4)) # noqa + print(json.dumps(payload, indent=4)) # noqa def get_standard_logging_metadata( @@ -4527,6 +4529,9 @@ def get_standard_logging_metadata( clean_metadata = StandardLoggingMetadata( user_api_key_hash=None, user_api_key_alias=None, + user_api_key_spend=None, + user_api_key_max_budget=None, + user_api_key_budget_reset_at=None, user_api_key_team_id=None, user_api_key_org_id=None, user_api_key_user_id=None, @@ -4576,9 +4581,9 @@ def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]): ): for k, v in metadata["user_api_key_metadata"].items(): if k == "logging": # prevent logging user logging keys - cleaned_user_api_key_metadata[k] = ( - "scrubbed_by_litellm_for_sensitive_keys" - ) + cleaned_user_api_key_metadata[ + k + ] = "scrubbed_by_litellm_for_sensitive_keys" else: cleaned_user_api_key_metadata[k] = v diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 020cd3e8cde2..4ff0eeb1fb6f 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -171,12 +171,12 @@ def _get_dynamic_logging_metadata( user_api_key_dict: UserAPIKeyAuth, proxy_config: ProxyConfig ) -> Optional[TeamCallbackMetadata]: callback_settings_obj: Optional[TeamCallbackMetadata] = None - key_dynamic_logging_settings: Optional[dict] = ( - KeyAndTeamLoggingSettings.get_key_dynamic_logging_settings(user_api_key_dict) - ) - team_dynamic_logging_settings: Optional[dict] = ( - KeyAndTeamLoggingSettings.get_team_dynamic_logging_settings(user_api_key_dict) - ) + key_dynamic_logging_settings: Optional[ + dict + ] = KeyAndTeamLoggingSettings.get_key_dynamic_logging_settings(user_api_key_dict) + team_dynamic_logging_settings: Optional[ + dict + ] = KeyAndTeamLoggingSettings.get_team_dynamic_logging_settings(user_api_key_dict) ######################################################################################### # Key-based callbacks ######################################################################################### @@ -272,7 +272,7 @@ def _get_timeout_from_request(headers: dict) -> Optional[float]: if timeout_header is not None: return float(timeout_header) return None - + @staticmethod def _get_stream_timeout_from_request(headers: dict) -> Optional[float]: """ @@ -292,13 +292,14 @@ def _get_num_retries_from_request(headers: dict) -> Optional[int]: if num_retries_header is not None: return int(num_retries_header) return None - + @staticmethod def _get_spend_logs_metadata_from_request_headers(headers: dict) -> Optional[dict]: """ Get the `spend_logs_metadata` from the request headers. """ from litellm.litellm_core_utils.safe_json_loads import safe_json_loads + spend_logs_metadata_header = headers.get("x-litellm-spend-logs-metadata", None) if spend_logs_metadata_header is not None: return safe_json_loads(spend_logs_metadata_header) @@ -337,16 +338,24 @@ def _get_case_insensitive_header(headers: dict, key: str) -> Optional[str]: return None @staticmethod - def add_internal_user_from_user_mapping(general_settings: Optional[Dict], user_api_key_dict: UserAPIKeyAuth, headers: dict) -> UserAPIKeyAuth: + def add_internal_user_from_user_mapping( + general_settings: Optional[Dict], + user_api_key_dict: UserAPIKeyAuth, + headers: dict, + ) -> UserAPIKeyAuth: if general_settings is None: return user_api_key_dict user_header_mapping = general_settings.get("user_header_mappings") if not user_header_mapping: return user_api_key_dict - header_name = LiteLLMProxyRequestSetup.get_internal_user_header_from_mapping(user_header_mapping) + header_name = LiteLLMProxyRequestSetup.get_internal_user_header_from_mapping( + user_header_mapping + ) if not header_name: return user_api_key_dict - header_value = LiteLLMProxyRequestSetup._get_case_insensitive_header(headers, header_name) + header_value = LiteLLMProxyRequestSetup._get_case_insensitive_header( + headers, header_name + ) if header_value: user_api_key_dict.user_id = header_value return user_api_key_dict @@ -497,8 +506,10 @@ def add_litellm_data_for_backend_llm_call( timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers) if timeout is not None: data["timeout"] = timeout - - stream_timeout = LiteLLMProxyRequestSetup._get_stream_timeout_from_request(headers) + + stream_timeout = LiteLLMProxyRequestSetup._get_stream_timeout_from_request( + headers + ) if stream_timeout is not None: data["stream_timeout"] = stream_timeout @@ -507,7 +518,7 @@ def add_litellm_data_for_backend_llm_call( data["num_retries"] = num_retries return data - + @staticmethod def add_litellm_metadata_from_request_headers( headers: dict, @@ -520,11 +531,16 @@ def add_litellm_metadata_from_request_headers( Relevant issue: https://github.com/BerriAI/litellm/issues/14008 """ from litellm.proxy._types import LitellmMetadataFromRequestHeaders + metadata_from_headers = LitellmMetadataFromRequestHeaders() - spend_logs_metadata = LiteLLMProxyRequestSetup._get_spend_logs_metadata_from_request_headers(headers) + spend_logs_metadata = ( + LiteLLMProxyRequestSetup._get_spend_logs_metadata_from_request_headers( + headers + ) + ) if spend_logs_metadata is not None: metadata_from_headers["spend_logs_metadata"] = spend_logs_metadata - + ######################################################################################### # Finally update the requests metadata with the `metadata_from_headers` ######################################################################################### @@ -539,6 +555,8 @@ def get_sanitized_user_information_from_key( user_api_key_logged_metadata = StandardLoggingUserAPIKeyMetadata( user_api_key_hash=user_api_key_dict.api_key, # just the hashed token user_api_key_alias=user_api_key_dict.key_alias, + user_api_key_spend=user_api_key_dict.spend, + user_api_key_max_budget=user_api_key_dict.max_budget, user_api_key_team_id=user_api_key_dict.team_id, user_api_key_user_id=user_api_key_dict.user_id, user_api_key_org_id=user_api_key_dict.org_id, @@ -589,11 +607,11 @@ def add_key_level_controls( ## KEY-LEVEL SPEND LOGS / TAGS if "tags" in key_metadata and key_metadata["tags"] is not None: - data[_metadata_variable_name]["tags"] = ( - LiteLLMProxyRequestSetup._merge_tags( - request_tags=data[_metadata_variable_name].get("tags"), - tags_to_add=key_metadata["tags"], - ) + data[_metadata_variable_name][ + "tags" + ] = LiteLLMProxyRequestSetup._merge_tags( + request_tags=data[_metadata_variable_name].get("tags"), + tags_to_add=key_metadata["tags"], ) if "spend_logs_metadata" in key_metadata and isinstance( key_metadata["spend_logs_metadata"], dict @@ -715,7 +733,6 @@ async def add_litellm_data_to_request( # noqa: PLR0915 from litellm.proxy.proxy_server import llm_router, premium_user from litellm.types.proxy.litellm_pre_call_utils import SecretFields - _headers = clean_headers( request.headers, litellm_key_header_name=( @@ -741,8 +758,6 @@ async def add_litellm_data_to_request( # noqa: PLR0915 if data.get(_metadata_variable_name, None) is None: data[_metadata_variable_name] = {} - - data.update( LiteLLMProxyRequestSetup.add_litellm_data_for_backend_llm_call( headers=_headers, @@ -764,7 +779,9 @@ async def add_litellm_data_to_request( # noqa: PLR0915 data=data, headers=_headers, user_api_key_dict=user_api_key_dict ) - user_api_key_dict = LiteLLMProxyRequestSetup.add_internal_user_from_user_mapping(general_settings, user_api_key_dict, _headers) + user_api_key_dict = LiteLLMProxyRequestSetup.add_internal_user_from_user_mapping( + general_settings, user_api_key_dict, _headers + ) # Parse user info from headers user = LiteLLMProxyRequestSetup.get_user_from_headers(_headers, general_settings) @@ -774,7 +791,6 @@ async def add_litellm_data_to_request( # noqa: PLR0915 if "user" not in data: data["user"] = user - data["secret_fields"] = SecretFields(raw_headers=dict(request.headers)) ## Dynamic api version (Azure OpenAI endpoints) ## @@ -824,9 +840,9 @@ async def add_litellm_data_to_request( # noqa: PLR0915 data[_metadata_variable_name]["litellm_api_version"] = version if general_settings is not None: - data[_metadata_variable_name]["global_max_parallel_requests"] = ( - general_settings.get("global_max_parallel_requests", None) - ) + data[_metadata_variable_name][ + "global_max_parallel_requests" + ] = general_settings.get("global_max_parallel_requests", None) ### KEY-LEVEL Controls key_metadata = user_api_key_dict.metadata diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py index 6e5d4faabdbc..f16bae559bb1 100644 --- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py @@ -474,6 +474,9 @@ def _init_kwargs_for_pass_through_endpoint( user_api_key_team_alias=user_api_key_dict.team_alias, user_api_key_end_user_id=user_api_key_dict.end_user_id, user_api_key_request_route=user_api_key_dict.request_route, + user_api_key_spend=user_api_key_dict.spend, + user_api_key_max_budget=user_api_key_dict.max_budget, + user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at, ) ) @@ -1003,7 +1006,7 @@ def add_exact_path_route( ): """Add exact path route for pass-through endpoint""" route_key = f"{endpoint_id}:exact:{path}" - + # Check if this exact route is already registered if route_key in _registered_pass_through_routes: verbose_proxy_logger.debug( @@ -1011,7 +1014,7 @@ def add_exact_path_route( path, ) return - + verbose_proxy_logger.debug( "adding exact pass through endpoint: %s, dependencies: %s", path, @@ -1032,12 +1035,12 @@ def add_exact_path_route( methods=["GET", "POST", "PUT", "DELETE", "PATCH"], dependencies=dependencies, ) - + # Register the route to prevent duplicates _registered_pass_through_routes[route_key] = { "endpoint_id": endpoint_id, "path": path, - "type": "exact" + "type": "exact", } @staticmethod @@ -1055,7 +1058,7 @@ def add_subpath_route( """Add wildcard route for sub-paths""" wildcard_path = f"{path}/{{subpath:path}}" route_key = f"{endpoint_id}:subpath:{path}" - + # Check if this subpath route is already registered if route_key in _registered_pass_through_routes: verbose_proxy_logger.debug( @@ -1063,7 +1066,7 @@ def add_subpath_route( wildcard_path, ) return - + verbose_proxy_logger.debug( "adding wildcard pass through endpoint: %s, dependencies: %s", wildcard_path, @@ -1085,19 +1088,20 @@ def add_subpath_route( methods=["GET", "POST", "PUT", "DELETE", "PATCH"], dependencies=dependencies, ) - + # Register the route to prevent duplicates _registered_pass_through_routes[route_key] = { "endpoint_id": endpoint_id, "path": path, - "type": "subpath" + "type": "subpath", } @staticmethod def remove_endpoint_routes(endpoint_id: str): """Remove all routes for a specific endpoint ID from the registry""" keys_to_remove = [ - key for key, value in _registered_pass_through_routes.items() + key + for key, value in _registered_pass_through_routes.items() if value["endpoint_id"] == endpoint_id ] for key in keys_to_remove: @@ -1480,7 +1484,7 @@ async def delete_pass_through_endpoints( pass_through_endpoint_data.pop(endpoint_index) response_obj = found_endpoint - # Remove routes from registry + # Remove routes from registry InitPassThroughEndpointHelpers.remove_endpoint_routes(endpoint_id) ## Update db diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 0abce2bcd2dc..21f73e0ce5eb 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -162,7 +162,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): SearchContextCostPerQuery ] # Cost for using web search tool citation_cost_per_token: Optional[float] # Cost per citation token for Perplexity - tiered_pricing: Optional[List[Dict[str, Any]]] # Tiered pricing structure for models like Dashscope + tiered_pricing: Optional[ + List[Dict[str, Any]] + ] # Tiered pricing structure for models like Dashscope litellm_provider: Required[str] mode: Required[ Literal[ @@ -1808,8 +1810,8 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict): user_api_key_hash: Optional[str] # hash of the litellm virtual key used user_api_key_alias: Optional[str] user_api_key_spend: Optional[float] - user_api_key_max_budget: Optional[float] = None - user_api_key_budget_reset_at: Optional[str] = None + user_api_key_max_budget: Optional[float] + user_api_key_budget_reset_at: Optional[str] user_api_key_org_id: Optional[str] user_api_key_team_id: Optional[str] user_api_key_user_id: Optional[str] @@ -1999,7 +2001,7 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False): ] guardrail_request: Optional[dict] guardrail_response: Optional[Union[dict, str, List[dict]]] - guardrail_status: Literal["success", "failure","blocked"] + guardrail_status: Literal["success", "failure", "blocked"] start_time: Optional[float] end_time: Optional[float] duration: Optional[float]