From e3c4593b5078129a7882a68fdfa10b4996a11a5b Mon Sep 17 00:00:00 2001 From: Hannah Stepanek Date: Wed, 13 Dec 2023 12:47:51 -0800 Subject: [PATCH] Capture _usage_based headers in v0 --- newrelic/hooks/mlmodel_openai.py | 50 ++++++++------------ tests/mlmodel_openai/test_chat_completion.py | 6 +++ 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index db58d00e3a..4ba96772b7 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -28,6 +28,7 @@ OPENAI_VERSION_TUPLE = tuple(map(int, OPENAI_VERSION.split("."))) OPENAI_V1 = OPENAI_VERSION_TUPLE >= (1,) + def wrap_embedding_sync(wrapped, instance, args, kwargs): transaction = current_transaction() if not transaction or kwargs.get("stream", False): @@ -104,7 +105,6 @@ def wrap_embedding_sync(wrapped, instance, args, kwargs): else: attribute_response = response - request_id = response_headers.get("x-request-id", "") if response_headers else "" response_model = attribute_response.get("model", "") @@ -125,7 +125,7 @@ def wrap_embedding_sync(wrapped, instance, args, kwargs): "duration": ft.duration, "response.model": response_model, "response.organization": organization, - "response.api_type": api_type, # API type was removed in v1 + "response.api_type": api_type, # API type was removed in v1 "response.usage.total_tokens": response_usage.get("total_tokens", "") if any(response_usage) else "", "response.usage.prompt_tokens": response_usage.get("prompt_tokens", "") if any(response_usage) else "", "response.headers.llmVersion": response_headers.get("openai-version", ""), @@ -327,22 +327,17 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header( + response_headers, "x-ratelimit-limit-tokens_usage_based", True + ), + "response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header( + response_headers, "x-ratelimit-reset-tokens_usage_based", False + ), + "response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-tokens_usage_based", True + ), "response.number_of_messages": len(messages) + len(choices), } - if OPENAI_V1: - full_chat_completion_summary_dict.update( - { - "response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header( - response_headers, "x-ratelimit-limit-tokens_usage_based", True - ), - "response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header( - response_headers, "x-ratelimit-reset-tokens_usage_based", False - ), - "response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header( - response_headers, "x-ratelimit-remaining-tokens_usage_based", True - ), - } - ) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) @@ -772,22 +767,17 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header( + response_headers, "x-ratelimit-limit-tokens_usage_based", True + ), + "response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header( + response_headers, "x-ratelimit-reset-tokens_usage_based", False + ), + "response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header( + response_headers, "x-ratelimit-remaining-tokens_usage_based", True + ), "response.number_of_messages": len(messages) + len(choices), } - if OPENAI_V1: - full_chat_completion_summary_dict.update( - { - "response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header( - response_headers, "x-ratelimit-limit-tokens_usage_based", True - ), - "response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header( - response_headers, "x-ratelimit-reset-tokens_usage_based", False - ), - "response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header( - response_headers, "x-ratelimit-remaining-tokens_usage_based", True - ), - } - ) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 4e582f4638..f2c31b2628 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -63,6 +63,9 @@ "response.headers.ratelimitResetRequests": "7m12s", "response.headers.ratelimitRemainingTokens": 39940, "response.headers.ratelimitRemainingRequests": 199, + "response.headers.ratelimitLimitTokensUsageBased": "", + "response.headers.ratelimitResetTokensUsageBased": "", + "response.headers.ratelimitRemainingTokensUsageBased": "", "vendor": "openAI", "ingest_source": "Python", "response.number_of_messages": 3, @@ -179,6 +182,9 @@ def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info): "response.headers.ratelimitResetRequests": "7m12s", "response.headers.ratelimitRemainingTokens": 39940, "response.headers.ratelimitRemainingRequests": 199, + "response.headers.ratelimitLimitTokensUsageBased": "", + "response.headers.ratelimitResetTokensUsageBased": "", + "response.headers.ratelimitRemainingTokensUsageBased": "", "vendor": "openAI", "ingest_source": "Python", "response.number_of_messages": 3,