Skip to content

Commit

Permalink
Add support for openai v1 completions (#1006)
Browse files Browse the repository at this point in the history
* Add chat completion & header instrumentation

* Add message id tests & fix bug

* Add error tracing tests & impl for v1

* Use latest to test instead of <1.0

* Ignore v1 embedding error tests

* Capture _usage_based headers in v0

* Verify all error attrs are asserted

* Use body instead of content

* Handle body being None
  • Loading branch information
hmstepanek authored Dec 15, 2023
1 parent 140c7bc commit 7b98c51
Show file tree
Hide file tree
Showing 9 changed files with 1,232 additions and 92 deletions.
5 changes: 5 additions & 0 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,11 @@ def _process_module_builtin_defaults():
"newrelic.hooks.mlmodel_openai",
"instrument_openai_util",
)
_process_module_definition(
"openai.resources.chat.completions",
"newrelic.hooks.mlmodel_openai",
"instrument_openai_resources_chat_completions",
)
_process_module_definition(
"openai._base_client",
"newrelic.hooks.mlmodel_openai",
Expand Down
170 changes: 124 additions & 46 deletions newrelic/hooks/mlmodel_openai.py

Large diffs are not rendered by default.

66 changes: 64 additions & 2 deletions tests/mlmodel_openai/_mock_external_openai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
{
"content-type": "application/json",
"openai-model": "gpt-3.5-turbo-0613",
"openai-organization": "foobar-jtbczk",
"openai-organization": "new-relic-nkmd8b",
"openai-processing-ms": "6326",
"openai-version": "2020-10-01",
"x-ratelimit-limit-requests": "200",
Expand All @@ -60,7 +60,45 @@
"index": 0,
"message": {
"role": "assistant",
"content": "To convert 212 degrees Fahrenheit to Celsius, you can use the formula:\n\n\u00b0C = (\u00b0F - 32) x 5/9\n\nSubstituting the value, we get:\n\n\u00b0C = (212 - 32) x 5/9\n = 180 x 5/9\n = 100\n\nTherefore, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
"content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 26, "completion_tokens": 82, "total_tokens": 108},
"system_fingerprint": None,
},
],
"You are a mathematician.": [
{
"content-type": "application/json",
"openai-model": "gpt-3.5-turbo-0613",
"openai-organization": "new-relic-nkmd8b",
"openai-processing-ms": "6326",
"openai-version": "2020-10-01",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-limit-tokens": "40000",
"x-ratelimit-limit-tokens_usage_based": "40000",
"x-ratelimit-remaining-requests": "198",
"x-ratelimit-remaining-tokens": "39880",
"x-ratelimit-remaining-tokens_usage_based": "39880",
"x-ratelimit-reset-requests": "11m32.334s",
"x-ratelimit-reset-tokens": "180ms",
"x-ratelimit-reset-tokens_usage_based": "180ms",
"x-request-id": "f8d0f53b6881c5c0a3698e55f8f410cd",
},
200,
{
"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTeat",
"object": "chat.completion",
"created": 1701995833,
"model": "gpt-3.5-turbo-0613",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "1 plus 2 is 3.",
},
"finish_reason": "stop",
}
Expand All @@ -69,6 +107,30 @@
"system_fingerprint": None,
},
],
"Invalid API key.": [
{"content-type": "application/json; charset=utf-8", "x-request-id": "a51821b9fd83d8e0e04542bedc174310"},
401,
{
"error": {
"message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
"type": "invalid_request_error",
"param": None,
"code": "invalid_api_key",
}
},
],
"Model does not exist.": [
{"content-type": "application/json; charset=utf-8", "x-request-id": "3b0f8e510ee8a67c08a227a98eadbbe6"},
404,
{
"error": {
"message": "The model `does-not-exist` does not exist",
"type": "invalid_request_error",
"param": None,
"code": "model_not_found",
}
},
],
"This is an embedding test.": [
{
"content-type": "application/json",
Expand Down
82 changes: 42 additions & 40 deletions tests/mlmodel_openai/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@
if get_openai_version() < (1, 0):
collect_ignore = [
"test_chat_completion_v1.py",
"test_chat_completion_error_v1.py",
"test_embeddings_v1.py",
"test_get_llm_message_ids_v1.py",
"test_chat_completion_error_v1.py",
"test_embeddings_error_v1.py",
]
Expand All @@ -63,6 +65,7 @@
"test_chat_completion.py",
"test_get_llm_message_ids.py",
"test_chat_completion_error.py",
"test_embeddings_error_v1.py",
]


Expand Down Expand Up @@ -143,9 +146,9 @@ def set_info():
def openai_server(
openai_version, # noqa: F811
openai_clients,
wrap_openai_base_client_process_response,
wrap_openai_api_requestor_request,
wrap_openai_api_requestor_interpret_response,
wrap_httpx_client_send,
):
"""
This fixture will either create a mocked backend for testing purposes, or will
Expand All @@ -165,9 +168,7 @@ def openai_server(
yield # Run tests
else:
# Apply function wrappers to record data
wrap_function_wrapper(
"openai._base_client", "BaseClient._process_response", wrap_openai_base_client_process_response
)
wrap_function_wrapper("httpx._client", "Client.send", wrap_httpx_client_send)
yield # Run tests
# Write responses to audit log
with open(OPENAI_AUDIT_LOG_FILE, "w") as audit_log_fp:
Expand All @@ -177,6 +178,43 @@ def openai_server(
yield


def bind_send_params(request, *, stream=False, **kwargs):
return request


@pytest.fixture(scope="session")
def wrap_httpx_client_send(extract_shortened_prompt): # noqa: F811
def _wrap_httpx_client_send(wrapped, instance, args, kwargs):
request = bind_send_params(*args, **kwargs)
if not request:
return wrapped(*args, **kwargs)

params = json.loads(request.content.decode("utf-8"))
prompt = extract_shortened_prompt(params)

# Send request
response = wrapped(*args, **kwargs)

if response.status_code >= 400 or response.status_code < 200:
prompt = "error"

rheaders = getattr(response, "headers")

headers = dict(
filter(
lambda k: k[0].lower() in RECORDED_HEADERS
or k[0].lower().startswith("openai")
or k[0].lower().startswith("x-ratelimit"),
rheaders.items(),
)
)
body = json.loads(response.content.decode("utf-8"))
OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body # Append response data to log
return response

return _wrap_httpx_client_send


@pytest.fixture(scope="session")
def wrap_openai_api_requestor_interpret_response():
def _wrap_openai_api_requestor_interpret_response(wrapped, instance, args, kwargs):
Expand Down Expand Up @@ -235,39 +273,3 @@ def bind_request_params(method, url, params=None, *args, **kwargs):

def bind_request_interpret_response_params(result, stream):
return result.content.decode("utf-8"), result.status_code, result.headers


def bind_base_client_process_response(
cast_to,
options,
response,
stream,
stream_cls,
):
return options, response


@pytest.fixture(scope="session")
def wrap_openai_base_client_process_response(extract_shortened_prompt): # noqa: F811
def _wrap_openai_base_client_process_response(wrapped, instance, args, kwargs):
options, response = bind_base_client_process_response(*args, **kwargs)
if not options:
return wrapped(*args, **kwargs)

data = getattr(options, "json_data", {})
prompt = extract_shortened_prompt(data)
rheaders = getattr(response, "headers")

headers = dict(
filter(
lambda k: k[0].lower() in RECORDED_HEADERS
or k[0].lower().startswith("openai")
or k[0].lower().startswith("x-ratelimit"),
rheaders.items(),
)
)
body = json.loads(response.content.decode("utf-8"))
OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body # Append response data to audit log
return wrapped(*args, **kwargs)

return _wrap_openai_base_client_process_response
6 changes: 6 additions & 0 deletions tests/mlmodel_openai/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
"response.headers.ratelimitResetRequests": "7m12s",
"response.headers.ratelimitRemainingTokens": 39940,
"response.headers.ratelimitRemainingRequests": 199,
"response.headers.ratelimitLimitTokensUsageBased": "",
"response.headers.ratelimitResetTokensUsageBased": "",
"response.headers.ratelimitRemainingTokensUsageBased": "",
"vendor": "openAI",
"ingest_source": "Python",
"response.number_of_messages": 3,
Expand Down Expand Up @@ -179,6 +182,9 @@ def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
"response.headers.ratelimitResetRequests": "7m12s",
"response.headers.ratelimitRemainingTokens": 39940,
"response.headers.ratelimitRemainingRequests": 199,
"response.headers.ratelimitLimitTokensUsageBased": "",
"response.headers.ratelimitResetTokensUsageBased": "",
"response.headers.ratelimitRemainingTokensUsageBased": "",
"vendor": "openAI",
"ingest_source": "Python",
"response.number_of_messages": 3,
Expand Down
Loading

0 comments on commit 7b98c51

Please sign in to comment.