Add support for openai v1 completions (#1006)

* Add chat completion & header instrumentation * Add message id tests & fix bug * Add error tracing tests & impl for v1 * Use latest to test instead of <1.0 * Ignore v1 embedding error tests * Capture _usage_based headers in v0 * Verify all error attrs are asserted * Use body instead of content * Handle body being None
newrelic · Dec 15, 2023 · 7b98c51 · 7b98c51
1 parent 140c7bc
commit 7b98c51
Show file tree

Hide file tree

Showing 9 changed files with 1,232 additions and 92 deletions.
diff --git a/newrelic/config.py b/newrelic/config.py
@@ -2058,6 +2058,11 @@ def _process_module_builtin_defaults():
         "newrelic.hooks.mlmodel_openai",
         "instrument_openai_util",
     )
+    _process_module_definition(
+        "openai.resources.chat.completions",
+        "newrelic.hooks.mlmodel_openai",
+        "instrument_openai_resources_chat_completions",
+    )
     _process_module_definition(
         "openai._base_client",
         "newrelic.hooks.mlmodel_openai",

diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -35,7 +35,7 @@
         {
             "content-type": "application/json",
             "openai-model": "gpt-3.5-turbo-0613",
-            "openai-organization": "foobar-jtbczk",
+            "openai-organization": "new-relic-nkmd8b",
             "openai-processing-ms": "6326",
             "openai-version": "2020-10-01",
             "x-ratelimit-limit-requests": "200",
@@ -60,7 +60,45 @@
                     "index": 0,
                     "message": {
                         "role": "assistant",
-                        "content": "To convert 212 degrees Fahrenheit to Celsius, you can use the formula:\n\n\u00b0C = (\u00b0F - 32) x 5/9\n\nSubstituting the value, we get:\n\n\u00b0C = (212 - 32) x 5/9\n   = 180 x 5/9\n   = 100\n\nTherefore, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+                        "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {"prompt_tokens": 26, "completion_tokens": 82, "total_tokens": 108},
+            "system_fingerprint": None,
+        },
+    ],
+    "You are a mathematician.": [
+        {
+            "content-type": "application/json",
+            "openai-model": "gpt-3.5-turbo-0613",
+            "openai-organization": "new-relic-nkmd8b",
+            "openai-processing-ms": "6326",
+            "openai-version": "2020-10-01",
+            "x-ratelimit-limit-requests": "200",
+            "x-ratelimit-limit-tokens": "40000",
+            "x-ratelimit-limit-tokens_usage_based": "40000",
+            "x-ratelimit-remaining-requests": "198",
+            "x-ratelimit-remaining-tokens": "39880",
+            "x-ratelimit-remaining-tokens_usage_based": "39880",
+            "x-ratelimit-reset-requests": "11m32.334s",
+            "x-ratelimit-reset-tokens": "180ms",
+            "x-ratelimit-reset-tokens_usage_based": "180ms",
+            "x-request-id": "f8d0f53b6881c5c0a3698e55f8f410cd",
+        },
+        200,
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTeat",
+            "object": "chat.completion",
+            "created": 1701995833,
+            "model": "gpt-3.5-turbo-0613",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "1 plus 2 is 3.",
                     },
                     "finish_reason": "stop",
                 }
@@ -69,6 +107,30 @@
             "system_fingerprint": None,
         },
     ],
+    "Invalid API key.": [
+        {"content-type": "application/json; charset=utf-8", "x-request-id": "a51821b9fd83d8e0e04542bedc174310"},
+        401,
+        {
+            "error": {
+                "message": "Incorrect API key provided: DEADBEEF. You can find your API key at https://platform.openai.com/account/api-keys.",
+                "type": "invalid_request_error",
+                "param": None,
+                "code": "invalid_api_key",
+            }
+        },
+    ],
+    "Model does not exist.": [
+        {"content-type": "application/json; charset=utf-8", "x-request-id": "3b0f8e510ee8a67c08a227a98eadbbe6"},
+        404,
+        {
+            "error": {
+                "message": "The model `does-not-exist` does not exist",
+                "type": "invalid_request_error",
+                "param": None,
+                "code": "model_not_found",
+            }
+        },
+    ],
     "This is an embedding test.": [
         {
             "content-type": "application/json",

diff --git a/tests/mlmodel_openai/conftest.py b/tests/mlmodel_openai/conftest.py
@@ -52,7 +52,9 @@
 if get_openai_version() < (1, 0):
     collect_ignore = [
         "test_chat_completion_v1.py",
+        "test_chat_completion_error_v1.py",
         "test_embeddings_v1.py",
+        "test_get_llm_message_ids_v1.py",
         "test_chat_completion_error_v1.py",
         "test_embeddings_error_v1.py",
     ]
@@ -63,6 +65,7 @@
         "test_chat_completion.py",
         "test_get_llm_message_ids.py",
         "test_chat_completion_error.py",
+        "test_embeddings_error_v1.py",
     ]
 
 
@@ -143,9 +146,9 @@ def set_info():
 def openai_server(
     openai_version,  # noqa: F811
     openai_clients,
-    wrap_openai_base_client_process_response,
     wrap_openai_api_requestor_request,
     wrap_openai_api_requestor_interpret_response,
+    wrap_httpx_client_send,
 ):
     """
     This fixture will either create a mocked backend for testing purposes, or will
@@ -165,9 +168,7 @@ def openai_server(
             yield  # Run tests
         else:
             # Apply function wrappers to record data
-            wrap_function_wrapper(
-                "openai._base_client", "BaseClient._process_response", wrap_openai_base_client_process_response
-            )
+            wrap_function_wrapper("httpx._client", "Client.send", wrap_httpx_client_send)
             yield  # Run tests
         # Write responses to audit log
         with open(OPENAI_AUDIT_LOG_FILE, "w") as audit_log_fp:
@@ -177,6 +178,43 @@ def openai_server(
         yield
 
 
+def bind_send_params(request, *, stream=False, **kwargs):
+    return request
+
+
+@pytest.fixture(scope="session")
+def wrap_httpx_client_send(extract_shortened_prompt):  # noqa: F811
+    def _wrap_httpx_client_send(wrapped, instance, args, kwargs):
+        request = bind_send_params(*args, **kwargs)
+        if not request:
+            return wrapped(*args, **kwargs)
+
+        params = json.loads(request.content.decode("utf-8"))
+        prompt = extract_shortened_prompt(params)
+
+        # Send request
+        response = wrapped(*args, **kwargs)
+
+        if response.status_code >= 400 or response.status_code < 200:
+            prompt = "error"
+
+        rheaders = getattr(response, "headers")
+
+        headers = dict(
+            filter(
+                lambda k: k[0].lower() in RECORDED_HEADERS
+                or k[0].lower().startswith("openai")
+                or k[0].lower().startswith("x-ratelimit"),
+                rheaders.items(),
+            )
+        )
+        body = json.loads(response.content.decode("utf-8"))
+        OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body  # Append response data to log
+        return response
+
+    return _wrap_httpx_client_send
+
+
 @pytest.fixture(scope="session")
 def wrap_openai_api_requestor_interpret_response():
     def _wrap_openai_api_requestor_interpret_response(wrapped, instance, args, kwargs):
@@ -235,39 +273,3 @@ def bind_request_params(method, url, params=None, *args, **kwargs):
 
 def bind_request_interpret_response_params(result, stream):
     return result.content.decode("utf-8"), result.status_code, result.headers
-
-
-def bind_base_client_process_response(
-    cast_to,
-    options,
-    response,
-    stream,
-    stream_cls,
-):
-    return options, response
-
-
-@pytest.fixture(scope="session")
-def wrap_openai_base_client_process_response(extract_shortened_prompt):  # noqa: F811
-    def _wrap_openai_base_client_process_response(wrapped, instance, args, kwargs):
-        options, response = bind_base_client_process_response(*args, **kwargs)
-        if not options:
-            return wrapped(*args, **kwargs)
-
-        data = getattr(options, "json_data", {})
-        prompt = extract_shortened_prompt(data)
-        rheaders = getattr(response, "headers")
-
-        headers = dict(
-            filter(
-                lambda k: k[0].lower() in RECORDED_HEADERS
-                or k[0].lower().startswith("openai")
-                or k[0].lower().startswith("x-ratelimit"),
-                rheaders.items(),
-            )
-        )
-        body = json.loads(response.content.decode("utf-8"))
-        OPENAI_AUDIT_LOG_CONTENTS[prompt] = headers, response.status_code, body  # Append response data to audit log
-        return wrapped(*args, **kwargs)
-
-    return _wrap_openai_base_client_process_response
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
@@ -63,6 +63,9 @@
             "response.headers.ratelimitResetRequests": "7m12s",
             "response.headers.ratelimitRemainingTokens": 39940,
             "response.headers.ratelimitRemainingRequests": 199,
+            "response.headers.ratelimitLimitTokensUsageBased": "",
+            "response.headers.ratelimitResetTokensUsageBased": "",
+            "response.headers.ratelimitRemainingTokensUsageBased": "",
             "vendor": "openAI",
             "ingest_source": "Python",
             "response.number_of_messages": 3,
@@ -179,6 +182,9 @@ def test_openai_chat_completion_sync_in_txn_with_convo_id(set_trace_info):
             "response.headers.ratelimitResetRequests": "7m12s",
             "response.headers.ratelimitRemainingTokens": 39940,
             "response.headers.ratelimitRemainingRequests": 199,
+            "response.headers.ratelimitLimitTokensUsageBased": "",
+            "response.headers.ratelimitResetTokensUsageBased": "",
+            "response.headers.ratelimitRemainingTokensUsageBased": "",
             "vendor": "openAI",
             "ingest_source": "Python",
             "response.number_of_messages": 3,