Skip to content

Commit

Permalink
Capture _usage_based headers in v0
Browse files Browse the repository at this point in the history
  • Loading branch information
hmstepanek committed Dec 13, 2023
1 parent f8b3703 commit e3c4593
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 30 deletions.
50 changes: 20 additions & 30 deletions newrelic/hooks/mlmodel_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
OPENAI_VERSION_TUPLE = tuple(map(int, OPENAI_VERSION.split(".")))
OPENAI_V1 = OPENAI_VERSION_TUPLE >= (1,)


def wrap_embedding_sync(wrapped, instance, args, kwargs):
transaction = current_transaction()
if not transaction or kwargs.get("stream", False):
Expand Down Expand Up @@ -104,7 +105,6 @@ def wrap_embedding_sync(wrapped, instance, args, kwargs):
else:
attribute_response = response


request_id = response_headers.get("x-request-id", "") if response_headers else ""

response_model = attribute_response.get("model", "")
Expand All @@ -125,7 +125,7 @@ def wrap_embedding_sync(wrapped, instance, args, kwargs):
"duration": ft.duration,
"response.model": response_model,
"response.organization": organization,
"response.api_type": api_type, # API type was removed in v1
"response.api_type": api_type, # API type was removed in v1
"response.usage.total_tokens": response_usage.get("total_tokens", "") if any(response_usage) else "",
"response.usage.prompt_tokens": response_usage.get("prompt_tokens", "") if any(response_usage) else "",
"response.headers.llmVersion": response_headers.get("openai-version", ""),
Expand Down Expand Up @@ -327,22 +327,17 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs):
"response.headers.ratelimitRemainingRequests": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-requests", True
),
"response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-limit-tokens_usage_based", True
),
"response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-reset-tokens_usage_based", False
),
"response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-tokens_usage_based", True
),
"response.number_of_messages": len(messages) + len(choices),
}
if OPENAI_V1:
full_chat_completion_summary_dict.update(
{
"response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-limit-tokens_usage_based", True
),
"response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-reset-tokens_usage_based", False
),
"response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-tokens_usage_based", True
),
}
)

transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict)

Expand Down Expand Up @@ -772,22 +767,17 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs):
"response.headers.ratelimitRemainingRequests": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-requests", True
),
"response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-limit-tokens_usage_based", True
),
"response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-reset-tokens_usage_based", False
),
"response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-tokens_usage_based", True
),
"response.number_of_messages": len(messages) + len(choices),
}
if OPENAI_V1:
full_chat_completion_summary_dict.update(
{
"response.headers.ratelimitLimitTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-limit-tokens_usage_based", True
),
"response.headers.ratelimitResetTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-reset-tokens_usage_based", False
),
"response.headers.ratelimitRemainingTokensUsageBased": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-tokens_usage_based", True
),
}
)

transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict)

Expand Down
6 changes: 6 additions & 0 deletions tests/mlmodel_openai/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
"response.headers.ratelimitResetRequests": "7m12s",
"response.headers.ratelimitRemainingTokens": 39940,
"response.headers.ratelimitRemainingRequests": 199,
"response.headers.ratelimitLimitTokensUsageBased": "",
"response.headers.ratelimitResetTokensUsageBased": "",
"response.headers.ratelimitRemainingTokensUsageBased": "",
"vendor": "openAI",
"ingest_source": "Python",
"response.number_of_messages": 3,
Expand Down Expand Up @@ -179,6 +182,9 @@ def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
"response.headers.ratelimitResetRequests": "7m12s",
"response.headers.ratelimitRemainingTokens": 39940,
"response.headers.ratelimitRemainingRequests": 199,
"response.headers.ratelimitLimitTokensUsageBased": "",
"response.headers.ratelimitResetTokensUsageBased": "",
"response.headers.ratelimitRemainingTokensUsageBased": "",
"vendor": "openAI",
"ingest_source": "Python",
"response.number_of_messages": 3,
Expand Down

0 comments on commit e3c4593

Please sign in to comment.