newrelic · kaylareopelle · Mar 14, 2024 · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/lib/new_relic/agent/instrumentation/ruby_openai/instrumentation.rb b/lib/new_relic/agent/instrumentation/ruby_openai/instrumentation.rb
@@ -92,6 +92,7 @@ def add_chat_completion_response_params(parameters, response, event)
 
     def add_embeddings_response_params(response, event)
       event.response_model = response['model']
+      event.token_count = calculate_token_count(event.request_model, event.input)
     end
 
     def create_chat_completion_messages(parameters, summary_id)
@@ -101,8 +102,7 @@ def create_chat_completion_messages(parameters, summary_id)
           role: message[:role] || message['role'],
           sequence: index,
           completion_id: summary_id,
-          vendor: VENDOR,
-          is_response: true
+          vendor: VENDOR
         )
       end
     end
@@ -129,9 +129,21 @@ def update_chat_completion_messages(messages, response, summary)
         message.request_id = summary.request_id
         message.response_model = response['model']
         message.metadata = llm_custom_attributes
+
+        model = message.is_response ? message.response_model : summary.request_model
+
+        message.token_count = calculate_token_count(model, message.content)
       end
     end
 
+    def calculate_token_count(model, content)
+      # return unless NewRelic::Agent.config['ai_monitoring.record_content.enabled']
+      return unless NewRelic::Agent.llm_token_count_callback
+
+      count = NewRelic::Agent.llm_token_count_callback.call({model: model, content: content})
+      return count unless count.is_a?(Integer) && count <= 0
+    end
+
     def llm_custom_attributes
       attributes = NewRelic::Agent::Tracer.current_transaction&.attributes&.custom_attributes&.select { |k| k.to_s.match(/llm.*/) }
 

diff --git a/lib/new_relic/agent/llm/chat_completion_message.rb b/lib/new_relic/agent/llm/chat_completion_message.rb
@@ -6,7 +6,7 @@ module NewRelic
   module Agent
     module Llm
       class ChatCompletionMessage < LlmEvent
-        ATTRIBUTES = %i[content role sequence completion_id is_response]
+        ATTRIBUTES = %i[content role sequence token_count is_response]
         EVENT_NAME = 'LlmChatCompletionMessage'
 
         attr_accessor(*ATTRIBUTES)

diff --git a/lib/new_relic/agent/llm/embedding.rb b/lib/new_relic/agent/llm/embedding.rb
@@ -8,7 +8,7 @@ module Llm
       class Embedding < LlmEvent
         include ResponseHeaders
 
-        ATTRIBUTES = %i[input request_model duration error]
+        ATTRIBUTES = %i[input request_model token_count duration error]
         ATTRIBUTE_NAME_EXCEPTIONS = {
           request_model: 'request.model'
         }

diff --git a/test/multiverse/suites/ruby_openai/openai_helpers.rb b/test/multiverse/suites/ruby_openai/openai_helpers.rb
@@ -124,6 +124,13 @@ def faraday_connection.post(*args); ChatResponse.new; end
     faraday_connection
   end
 
+  def embedding_faraday_connection
+    faraday_connection = Faraday.new
+    def faraday_connection.post(*args); EmbeddingsResponse.new; end
+
+    faraday_connection
+  end
+
   def error_faraday_connection
     faraday_connection = Faraday.new
     def faraday_connection.post(*args); raise 'deception'; end
@@ -201,7 +208,7 @@ def stub_embeddings_post_request(&blk)
         yield
       end
     else
-      connection_client.stub(:conn, faraday_connection) do
+      connection_client.stub(:conn, embedding_faraday_connection) do
         yield
       end
     end

diff --git a/test/multiverse/suites/ruby_openai/ruby_openai_instrumentation_test.rb b/test/multiverse/suites/ruby_openai/ruby_openai_instrumentation_test.rb
@@ -6,6 +6,9 @@
 
 class RubyOpenAIInstrumentationTest < Minitest::Test
   include OpenAIHelpers
+  # some of the private methods are too difficult to stub
+  # we can test them directly by including the module
+  include NewRelic::Agent::Instrumentation::OpenAI
 
   def setup
     @aggregator = NewRelic::Agent.agent.custom_event_aggregator
@@ -234,12 +237,94 @@ def test_embedding_event_sets_error_true_if_raised
   end
 
   def test_set_llm_agent_attribute_on_embedding_transaction
-    in_transaction do |txn|
+    in_transaction do
       stub_embeddings_post_request do
         client.embeddings(parameters: embeddings_params)
       end
     end
 
     assert_truthy harvest_transaction_events![1][0][2][:llm]
   end
+
+  def test_embeddings_token_count_assigned_by_callback_if_present
+    NewRelic::Agent.set_llm_token_count_callback(proc { |hash| 7734 })
+
+    in_transaction do
+      stub_embeddings_post_request do
+        client.embeddings(parameters: embeddings_params)
+      end
+    end
+
+    _, events = @aggregator.harvest!
+    embedding_event = events.find { |event| event[0]['type'] == NewRelic::Agent::Llm::Embedding::EVENT_NAME }
+
+    assert_equal 7734, embedding_event[1]['token_count']
+
+    NewRelic::Agent.remove_instance_variable(:@llm_token_count_callback)
+  end
+
+  def test_embeddings_token_count_attribute_absent_if_callback_returns_nil
+    NewRelic::Agent.set_llm_token_count_callback(proc { |hash| nil })
+
+    in_transaction do
+      stub_embeddings_post_request do
+        client.embeddings(parameters: embeddings_params)
+      end
+    end
+
+    _, events = @aggregator.harvest!
+    embedding_event = events.find { |event| event[0]['type'] == NewRelic::Agent::Llm::Embedding::EVENT_NAME }
+
+    refute embedding_event[1].key?('token_count')
+
+    NewRelic::Agent.remove_instance_variable(:@llm_token_count_callback)
+  end
+
+  def test_embeddings_token_count_attribute_absent_if_callback_returns_zero
+    NewRelic::Agent.set_llm_token_count_callback(proc { |hash| 0 })
+
+    in_transaction do
+      stub_embeddings_post_request do
+        client.embeddings(parameters: embeddings_params)
+      end
+    end
+
+    _, events = @aggregator.harvest!
+    embedding_event = events.find { |event| event[0]['type'] == NewRelic::Agent::Llm::Embedding::EVENT_NAME }
+
+    refute embedding_event[1].key?('token_count')
+
+    NewRelic::Agent.remove_instance_variable(:@llm_token_count_callback)
+  end
+
+  def test_embeddings_token_count_attribute_absent_if_no_callback_available
+    assert_nil NewRelic::Agent.llm_token_count_callback
+
+    in_transaction do
+      stub_embeddings_post_request do
+        client.embeddings(parameters: embeddings_params)
+      end
+    end
+
+    _, events = @aggregator.harvest!
+    embedding_event = events.find { |event| event[0]['type'] == NewRelic::Agent::Llm::Embedding::EVENT_NAME }
+
+    refute embedding_event[1].key?('token_count')
+  end
+
+  def test_chat_completion_message_token_count_assigned_by_callback_if_present
+
+  end
+
+  def test_chat_completion_message_token_count_attribute_absent_if_callback_returns_nil
+
+  end
+
+  def test_chat_completion_message_token_count_attribute_absent_if_callback_returns_zero
+
+  end
+
+  def test_chat_completion_message_token_count_attribute_absent_if_no_callback_available
+
+  end
 end
diff --git a/test/new_relic/agent/llm/chat_completion_message_test.rb b/test/new_relic/agent/llm/chat_completion_message_test.rb
@@ -54,8 +54,8 @@ def test_record_creates_an_event
         message.response_model = 'gpt-4'
         message.vendor = 'OpenAI'
         message.role = 'system'
-        message.completion_id = 123
         message.is_response = 'true'
+        message.token_count = 10
 
         message.record
         _, events = NewRelic::Agent.agent.custom_event_aggregator.harvest!
@@ -73,8 +73,8 @@ def test_record_creates_an_event
         assert_equal 'Red-Tailed Hawk', attributes['content']
         assert_equal 'system', attributes['role']
         assert_equal 2, attributes['sequence']
-        assert_equal 123, attributes['completion_id']
         assert_equal 'true', attributes['is_response']
+        assert_equal 10, attributes['token_count']
       end
     end
   end

diff --git a/test/new_relic/agent/llm/embedding_test.rb b/test/new_relic/agent/llm/embedding_test.rb
@@ -53,6 +53,7 @@ def test_record_creates_an_event
         embedding.vendor = 'OpenAI'
         embedding.duration = '500'
         embedding.error = 'true'
+        embedding.token_count = 10
         embedding.llm_version = '2022-01-01'
         embedding.rate_limit_requests = '100'
         embedding.rate_limit_tokens = '101'
@@ -79,6 +80,7 @@ def test_record_creates_an_event
         assert_equal 'Ruby', attributes['ingest_source']
         assert_equal '500', attributes['duration']
         assert_equal 'true', attributes['error']
+        assert_equal 10, attributes['token_count']
         assert_equal '2022-01-01', attributes['response.headers.llm_version']
         assert_equal '100', attributes['response.headers.ratelimitLimitRequests']
         assert_equal '101', attributes['response.headers.ratelimitLimitTokens']