feat: implement llm monitoring with langchainrb integration

getsentry · Sep 23, 2024 · 3640a7d · 3640a7d
1 parent 04e030f
commit 3640a7d
Show file tree

Hide file tree

Showing 5 changed files with 222 additions and 0 deletions.
diff --git a/Gemfile b/Gemfile
@@ -2,6 +2,8 @@ source "https://rubygems.org"
 git_source(:github) { |name| "https://github.com/#{name}.git" }
 
 gem "rake", "~> 12.0"
+gem "concurrent-ruby", require: "concurrent"
+gem "langchainrb", "~> 0.16.0"
 
 ruby_version = Gem::Version.new(RUBY_VERSION)
 

diff --git a/sentry-ruby/lib/sentry-ruby.rb b/sentry-ruby/lib/sentry-ruby.rb
@@ -25,6 +25,7 @@
 require "sentry/backpressure_monitor"
 require "sentry/cron/monitor_check_ins"
 require "sentry/metrics"
+require "sentry/ai/monitoring"
 
 [
   "sentry/rake",
@@ -600,6 +601,11 @@ def sdk_meta
     def utc_now
       Time.now.utc
     end
+
+    # Add a new method to access AI monitoring functionality
+    def ai
+      Sentry::AI::Monitoring
+    end
   end
 end
 
@@ -609,3 +615,4 @@ def utc_now
 require "sentry/puma"
 require "sentry/graphql"
 require "sentry/faraday"
+require "sentry/langchain" if defined?(::Langchain)
diff --git a/sentry-ruby/lib/sentry/ai/langchain.rb b/sentry-ruby/lib/sentry/ai/langchain.rb
@@ -0,0 +1,123 @@
+# frozen_string_literal: true
+
+require 'langchain'
+
+module Sentry
+  module AI
+    module Langchain
+      def self.patch_langchain_llms
+        # List of all LLM classes
+        llm_classes = [
+          ::Langchain::LLM::AI21,
+          ::Langchain::LLM::Anthropic,
+          ::Langchain::LLM::Azure,
+          ::Langchain::LLM::Cohere,
+          ::Langchain::LLM::GooglePalm,
+          ::Langchain::LLM::GoogleVertexAI,
+          ::Langchain::LLM::GoogleGemini,
+          ::Langchain::LLM::HuggingFace,
+          ::Langchain::LLM::LlamaCpp,
+          ::Langchain::LLM::OpenAI,
+          ::Langchain::LLM::Replicate
+        ]
+
+        llm_classes.each do |llm_class|
+          patch_llm_class(llm_class)
+        end
+      end
+
+      def self.patch_llm_class(llm_class)
+        llm_class.prepend(LangchainLLMPatch)
+      end
+
+      module LangchainLLMPatch
+        def chat(...)
+          wrap_with_sentry("chat") { super(...) }
+        end
+
+        def complete(...)
+          wrap_with_sentry("complete") { super(...) }
+        end
+
+        def embed(...)
+          wrap_with_sentry("embed") { super(...) }
+        end
+
+        def summarize(...)
+          wrap_with_sentry("summarize") { super(...) }
+        end
+
+        private
+
+        def wrap_with_sentry(call_type)
+          transaction = Sentry.get_current_scope.get_transaction
+
+          Sentry.capture_message("LangChain LLM #{call_type} call initiated for #{self.class.name}", level: 'info')
+
+          if transaction
+            span = transaction.start_child(
+              op: "ai.#{call_type}.langchain",
+              description: "LangChain LLM #{call_type.capitalize} Call for #{self.class.name}",
+              origin: "auto.ai.langchain"
+            )
+
+            span.set_data("ai.model_id", "#{self.class.name}::#{@defaults[:chat_completion_model_name]}")
+
+            # Add additional SPANDATA fields
+            span.set_data("ai.frequency_penalty", @defaults[:frequency_penalty])
+            span.set_data("ai.presence_penalty", @defaults[:presence_penalty])
+            span.set_data("ai.input_messages", @defaults[:messages])
+            span.set_data("ai.metadata", @defaults[:metadata])
+            span.set_data("ai.tags", @defaults[:tags])
+            span.set_data("ai.streaming", @defaults[:stream])
+            span.set_data("ai.temperature", @defaults[:temperature])
+            span.set_data("ai.top_p", @defaults[:top_p])
+            span.set_data("ai.top_k", @defaults[:top_k])
+            span.set_data("ai.function_call", @defaults[:function_call])
+            span.set_data("ai.tools", @defaults[:tools])
+            span.set_data("ai.response_format", @defaults[:response_format])
+            span.set_data("ai.logit_bias", @defaults[:logit_bias])
+            span.set_data("ai.preamble", @defaults[:preamble])
+            span.set_data("ai.raw_prompting", @defaults[:raw_prompting])
+            span.set_data("ai.seed", @defaults[:seed])
+
+            Sentry.capture_message("LLM span created for #{self.class.name}", level: 'info')
+
+            begin
+              result = yield
+              response_text = result.respond_to?(:completion) ? result.completion : result.to_s
+              span.set_data("ai.responses", [response_text])
+
+              # Workaround: calculate token usage based on characters / 4
+              prompt_tokens = (@defaults[:messages].to_s.length / 4.0).ceil
+              completion_tokens = (response_text.length / 4.0).ceil
+              total_tokens = prompt_tokens + completion_tokens
+              Sentry::AI::Monitoring.record_token_usage(transaction,
+                prompt_tokens: prompt_tokens, 
+                completion_tokens: completion_tokens,
+                total_tokens: total_tokens
+              )
+
+              Sentry.capture_message("LLM call completed successfully for #{self.class.name}", level: 'info')
+              result
+            rescue => e
+              span.set_status("internal_error")
+              Sentry.capture_exception(e, level: 'error')
+              Sentry.capture_message("Error in LLM call for #{self.class.name}: #{e.message}", level: 'error')
+              raise
+            ensure
+              span.finish
+              Sentry.capture_message("LLM span finished for #{self.class.name}", level: 'info')
+            end
+          else
+            Sentry.capture_message("No active transaction found for LLM call in #{self.class.name}", level: 'warning')
+            yield
+          end
+        end
+      end
+    end
+  end
+end
+
+Sentry.register_integration(:langchain, Sentry::VERSION)
+Sentry.capture_message("Sentry LangChain integration registered", level: 'info')
diff --git a/sentry-ruby/lib/sentry/ai/monitoring.rb b/sentry-ruby/lib/sentry/ai/monitoring.rb
@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+
+module Sentry
+  module AI
+    module Monitoring
+      DEFAULT_PIPELINE_NAME = "default_ai_pipeline"
+
+      class << self
+        def ai_track(description, **span_kwargs)
+          lambda do |original_method|
+            define_method(original_method.name) do |*args, **kwargs, &block|
+              transaction = Sentry.get_current_scope.get_transaction
+              curr_pipeline = Monitoring.ai_pipeline_name
+              op = span_kwargs[:op] || (curr_pipeline ? "ai.run" : "ai.pipeline")
+
+              if transaction
+                span = transaction.start_child(
+                  op: op,
+                  description: description,
+                  origin: "auto.ai.monitoring",
+                  **span_kwargs
+                )
+
+                kwargs[:sentry_tags]&.each { |k, v| span.set_tag(k, v) }
+                kwargs[:sentry_data]&.each { |k, v| span.set_data(k, v) }
+
+                span.set_data("ai.pipeline.name", curr_pipeline) if curr_pipeline
+
+                begin
+                  if curr_pipeline
+                    result = original_method.bind(self).call(*args, **kwargs, &block)
+                  else
+                    Monitoring.ai_pipeline_name = description
+                    result = original_method.bind(self).call(*args, **kwargs, &block)
+                  end
+                rescue => e
+                  Sentry.capture_exception(e)
+                  raise
+                ensure
+                  Monitoring.ai_pipeline_name = nil unless curr_pipeline
+                  span.finish
+                end
+
+                result
+              else
+                original_method.bind(self).call(*args, **kwargs, &block)
+              end
+            end
+          end
+        end
+
+        def record_token_usage(span, prompt_tokens: nil, completion_tokens: nil, total_tokens: nil)
+          ai_pipeline_name = Monitoring.ai_pipeline_name
+          span.set_data("ai.pipeline.name", ai_pipeline_name) if ai_pipeline_name
+          span.set_measurement("ai_prompt_tokens_used", value: prompt_tokens) if prompt_tokens
+          span.set_measurement("ai_completion_tokens_used", value: completion_tokens) if completion_tokens
+
+          if total_tokens.nil? && prompt_tokens && completion_tokens
+            total_tokens = prompt_tokens + completion_tokens
+          end
+
+          span.set_measurement("ai_total_tokens_used", value: total_tokens) if total_tokens
+        end
+
+        def ai_pipeline_name
+          Thread.current[:sentry_ai_pipeline_name] ||= DEFAULT_PIPELINE_NAME
+        end
+
+        def ai_pipeline_name=(name)
+          Thread.current[:sentry_ai_pipeline_name] = name
+        end
+      end
+    end
+  end
+end
diff --git a/sentry-ruby/lib/sentry/langchain.rb b/sentry-ruby/lib/sentry/langchain.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+require "sentry/ai/langchain"
+
+module Sentry
+  module Langchain
+    def self.setup
+      if defined?(::Langchain)
+        Sentry::AI::Langchain.patch_langchain_llms
+      end
+    end
+  end
+end
+
+Sentry::Langchain.setup