Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for openai v1 completions #1006

Merged
merged 9 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,11 @@ def _process_module_builtin_defaults():
"newrelic.hooks.mlmodel_openai",
"instrument_openai_util",
)
_process_module_definition(
"openai.resources.chat.completions",
"newrelic.hooks.mlmodel_openai",
"instrument_openai_resources_chat_completions",
)
_process_module_definition(
"openai._base_client",
"newrelic.hooks.mlmodel_openai",
Expand Down
170 changes: 124 additions & 46 deletions newrelic/hooks/mlmodel_openai.py

Large diffs are not rendered by default.

66 changes: 64 additions & 2 deletions tests/mlmodel_openai/_mock_external_openai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
{
"content-type": "application/json",
"openai-model": "gpt-3.5-turbo-0613",
"openai-organization": "foobar-jtbczk",
"openai-organization": "new-relic-nkmd8b",
"openai-processing-ms": "6326",
"openai-version": "2020-10-01",
"x-ratelimit-limit-requests": "200",
Expand All @@ -60,7 +60,45 @@
"index": 0,
"message": {
"role": "assistant",
"content": "To convert 212 degrees Fahrenheit to Celsius, you can use the formula:\n\n\u00b0C = (\u00b0F - 32) x 5/9\n\nSubstituting the value, we get:\n\n\u00b0C = (212 - 32) x 5/9\n = 180 x 5/9\n = 100\n\nTherefore, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
"content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 26, "completion_tokens": 82, "total_tokens": 108},
"system_fingerprint": None,
},
],
"You are a mathematician.": [
{
"content-type": "application/json",
"openai-model": "gpt-3.5-turbo-0613",
"openai-organization": "new-relic-nkmd8b",
"openai-processing-ms": "6326",
"openai-version": "2020-10-01",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-limit-tokens": "40000",
"x-ratelimit-limit-tokens_usage_based": "40000",
"x-ratelimit-remaining-requests": "198",
"x-ratelimit-remaining-tokens": "39880",
"x-ratelimit-remaining-tokens_usage_based": "39880",
"x-ratelimit-reset-requests": "11m32.334s",
"x-ratelimit-reset-tokens": "180ms",
"x-ratelimit-reset-tokens_usage_based": "180ms",
"x-request-id": "f8d0f53b6881c5c0a3698e55f8f410cd",
},
200,
{
"id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTeat",
"object": "chat.completion",
"created": 1701995833,
"model": "gpt-3.5-turbo-0613",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "1 plus 2 is 3.",
},
"finish_reason": "stop",
}
Expand All @@ -69,6 +107,30 @@
"system_fingerprint": None,
},
],
"Invalid API key.": [
{"content-type": "application/json; charset=utf-8", "x-request-id": "a51821b9fd83d8e0e04542bedc174310"},
401,
{
"error": {
"message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
"type": "invalid_request_error",
"param": None,
"code": "invalid_api_key",
}
},
],
"Model does not exist.": [
{"content-type": "application/json; charset=utf-8", "x-request-id": "3b0f8e510ee8a67c08a227a98eadbbe6"},
404,
{
"error": {
"message": "The model `does-not-exist` does not exist",
"type": "invalid_request_error",
"param": None,
"code": "model_not_found",
}
},
],
"This is an embedding test.": [
{
"content-type": "application/json",
Expand Down
82 changes: 42 additions & 40 deletions tests/mlmodel_openai/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@
if get_openai_version() < (1, 0):
collect_ignore = [
"test_chat_completion_v1.py",
"test_chat_completion_error_v1.py",
"test_embeddings_v1.py",
"test_get_llm_message_ids_v1.py",
"test_chat_completion_error_v1.py",
"test_embeddings_error_v1.py",
]
Expand All @@ -63,6 +65,7 @@
"test_chat_completion.py",
"test_get_llm_message_ids.py",
"test_chat_completion_error.py",
"test_embeddings_error_v1.py",
]


Expand Down Expand Up @@ -143,9 +146,9 @@ def set_info():
def openai_server(
openai_version, # noqa: F811
openai_clients,
wrap_openai_base_client_process_response,
wrap_openai_api_requestor_request,
wrap_openai_api_requestor_interpret_response,
wrap_httpx_client_send,
):
"""
This fixture will either create a mocked backend for testing purposes, or will
Expand All @@ -165,9 +168,7 @@ def openai_server(
yield # Run tests
else:
# Apply function wrappers to record data
wrap_function_wrapper(
"openai._base_client", "BaseClient._process_response", wrap_openai_base_client_process_response
)
wrap_function_wrapper("httpx._client", "Client.send", wrap_httpx_client_send)
yield # Run tests
# Write responses to audit log
with open(OPENAI_AUDIT_LOG_FILE, "w") as audit_log_fp:
Expand All @@ -177,6 +178,43 @@ def openai_server(
yield


def bind_send_params(request, *, stream=False, **kwargs):
return request


@pytest.fixture(scope="session")
def wrap_httpx_client_send(extract_shortened_prompt): # noqa: F811
def _wrap_httpx_client_send(wrapped, instance, args, kwargs):
request = bind_send_params(*args, **kwargs)
if not request:
return wrapped(*args, **kwargs)

params = json.loads(request.content.decode("utf-8"))
prompt = extract_shortened_prompt(params)

# Send request
response = wrapped(*args, **kwargs)

if response.status_code >= 400 or response.status_code < 200:
prompt = "error"

rheaders = getattr(response, "headers")

headers = dict(
filter(
lambda k: k[0].lower() in RECORDED_HEADERS
or k[0].lower().startswith("openai")
or k[0].lower().startswith("x-ratelimit"),
rheaders.items(),
)
)
body = json.loads(response.content.decode("utf-8"))
OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body # Append response data to log
return response

return _wrap_httpx_client_send


@pytest.fixture(scope="session")
def wrap_openai_api_requestor_interpret_response():
def _wrap_openai_api_requestor_interpret_response(wrapped, instance, args, kwargs):
Expand Down Expand Up @@ -235,39 +273,3 @@ def bind_request_params(method, url, params=None, *args, **kwargs):

def bind_request_interpret_response_params(result, stream):
return result.content.decode("utf-8"), result.status_code, result.headers


def bind_base_client_process_response(
cast_to,
options,
response,
stream,
stream_cls,
):
return options, response


@pytest.fixture(scope="session")
def wrap_openai_base_client_process_response(extract_shortened_prompt): # noqa: F811
def _wrap_openai_base_client_process_response(wrapped, instance, args, kwargs):
options, response = bind_base_client_process_response(*args, **kwargs)
if not options:
return wrapped(*args, **kwargs)

data = getattr(options, "json_data", {})
prompt = extract_shortened_prompt(data)
rheaders = getattr(response, "headers")

headers = dict(
filter(
lambda k: k[0].lower() in RECORDED_HEADERS
or k[0].lower().startswith("openai")
or k[0].lower().startswith("x-ratelimit"),
rheaders.items(),
)
)
body = json.loads(response.content.decode("utf-8"))
OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body # Append response data to audit log
return wrapped(*args, **kwargs)

return _wrap_openai_base_client_process_response
6 changes: 6 additions & 0 deletions tests/mlmodel_openai/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
"response.headers.ratelimitResetRequests": "7m12s",
"response.headers.ratelimitRemainingTokens": 39940,
"response.headers.ratelimitRemainingRequests": 199,
"response.headers.ratelimitLimitTokensUsageBased": "",
"response.headers.ratelimitResetTokensUsageBased": "",
"response.headers.ratelimitRemainingTokensUsageBased": "",
"vendor": "openAI",
"ingest_source": "Python",
"response.number_of_messages": 3,
Expand Down Expand Up @@ -179,6 +182,9 @@ def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
"response.headers.ratelimitResetRequests": "7m12s",
"response.headers.ratelimitRemainingTokens": 39940,
"response.headers.ratelimitRemainingRequests": 199,
"response.headers.ratelimitLimitTokensUsageBased": "",
"response.headers.ratelimitResetTokensUsageBased": "",
"response.headers.ratelimitRemainingTokensUsageBased": "",
"vendor": "openAI",
"ingest_source": "Python",
"response.number_of_messages": 3,
Expand Down
Loading
Loading