diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index 700fbce100f..221c32864f6 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'di/base' require_relative 'di/error' require_relative 'di/code_tracker' require_relative 'di/component' @@ -46,67 +47,7 @@ def enabled? # Expose DI to global shared objects Extensions.activate! - LOCK = Mutex.new - class << self - attr_reader :code_tracker - - # Activates code tracking. Normally this method should be called - # when the application starts. If instrumenting third-party code, - # code tracking needs to be enabled before the third-party libraries - # are loaded. If you definitely will not be instrumenting - # third-party libraries, activating tracking after third-party libraries - # have been loaded may improve lookup performance. - # - # TODO test that activating tracker multiple times preserves - # existing mappings in the registry - def activate_tracking! - (@code_tracker ||= CodeTracker.new).start - end - - # Activates code tracking if possible. - # - # This method does nothing if invoked in an environment that does not - # implement required trace points for code tracking (MRI Ruby < 2.6, - # JRuby) and rescues any exceptions that may be raised by downstream - # DI code. - def activate_tracking - # :script_compiled trace point was added in Ruby 2.6. - return unless RUBY_VERSION >= '2.6' - - begin - # Activate code tracking by default because line trace points will not work - # without it. - Datadog::DI.activate_tracking! - rescue => exc - if defined?(Datadog.logger) - Datadog.logger.warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}") - else - # We do not have Datadog logger potentially because DI code tracker is - # being loaded early in application boot process and the rest of datadog - # wasn't loaded yet. Output to standard error. - warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}") - end - end - end - - # Deactivates code tracking. In normal usage of DI this method should - # never be called, however it is used by DI's test suite to reset - # state for individual tests. - # - # Note that deactivating tracking clears out the registry, losing - # the ability to look up files that have been loaded into the process - # already. - def deactivate_tracking! - code_tracker&.stop - end - - # Returns whether code tracking is available. - # This method should be used instead of querying #code_tracker - # because the latter one may be nil. - def code_tracking_active? - code_tracker&.active? || false - end # This method is called from DI Remote handler to issue DI operations # to the probe manager (add or remove probes). @@ -120,39 +61,6 @@ def code_tracking_active? def component Datadog.send(:components).dynamic_instrumentation end - - # DI code tracker is instantiated globally before the regular set of - # components is created, but the code tracker needs to call out to the - # "current" DI component to perform instrumentation when application - # code is loaded. Because this call may happen prior to Datadog - # components having been initialized, we maintain the "current component" - # which contains a reference to the most recently instantiated - # DI::Component. This way, if a DI component hasn't been instantiated, - # we do not try to reference Datadog.components. - def current_component - LOCK.synchronize do - @current_components&.last - end - end - - # To avoid potential races with DI::Component being added and removed, - # we maintain a list of the components. Normally the list should contain - # either zero or one component depending on whether DI is enabled in - # Datadog configuration. However, if a new instance of DI::Component - # is created while the previous instance is still running, we are - # guaranteed to not end up with no component when one is running. - def add_current_component(component) - LOCK.synchronize do - @current_components ||= [] - @current_components << component - end - end - - def remove_current_component(component) - LOCK.synchronize do - @current_components&.delete(component) - end - end end end end diff --git a/lib/datadog/di/base.rb b/lib/datadog/di/base.rb new file mode 100644 index 00000000000..afe8c38e602 --- /dev/null +++ b/lib/datadog/di/base.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +# This file is loaded by datadog/di/init.rb. +# It contains just the global DI reference to the (normally one and only) +# code tracker for the current process. +# This file should not require the rest of DI, specifically none of the +# contrib code that is meant to be loaded after third-party libraries +# are loaded, and also none of the rest of datadog library which also +# has contrib code in other products. + +require_relative 'code_tracker' + +module Datadog + # Namespace for Datadog dynamic instrumentation. + # + # @api private + module DI + LOCK = Mutex.new + + class << self + attr_reader :code_tracker + + # Activates code tracking. Normally this method should be called + # when the application starts. If instrumenting third-party code, + # code tracking needs to be enabled before the third-party libraries + # are loaded. Any third-party code loaded before code tracking is + # activated will NOT be instrumentable using dynamic instrumentation. + # + # TODO test that activating tracker multiple times preserves + # existing mappings in the registry + def activate_tracking! + (@code_tracker ||= CodeTracker.new).start + end + + # Activates code tracking if possible. + # + # This method does nothing if invoked in an environment that does not + # implement required trace points for code tracking (MRI Ruby < 2.6, + # JRuby) and rescues any exceptions that may be raised by downstream + # DI code. + def activate_tracking + # :script_compiled trace point was added in Ruby 2.6. + return unless RUBY_VERSION >= '2.6' + + begin + # Activate code tracking by default because line trace points will not work + # without it. + Datadog::DI.activate_tracking! + rescue => exc + if defined?(Datadog.logger) + Datadog.logger.warn { "di: Failed to activate code tracking for DI: #{exc.class}: #{exc}" } + else + # We do not have Datadog logger potentially because DI code tracker is + # being loaded early in application boot process and the rest of datadog + # wasn't loaded yet. Output to standard error. + warn("datadog: di: Failed to activate code tracking for DI: #{exc.class}: #{exc}") + end + end + end + + # Deactivates code tracking. In normal usage of DI this method should + # never be called, however it is used by DI's test suite to reset + # state for individual tests. + # + # Note that deactivating tracking clears out the registry, losing + # the ability to look up files that have been loaded into the process + # already. + def deactivate_tracking! + code_tracker&.stop + end + + # Returns whether code tracking is available. + # This method should be used instead of querying #code_tracker + # because the latter one may be nil. + def code_tracking_active? + code_tracker&.active? || false + end + + # DI code tracker is instantiated globally before the regular set of + # components is created, but the code tracker needs to call out to the + # "current" DI component to perform instrumentation when application + # code is loaded. Because this call may happen prior to Datadog + # components having been initialized, we maintain the "current component" + # which contains a reference to the most recently instantiated + # DI::Component. This way, if a DI component hasn't been instantiated, + # we do not try to reference Datadog.components. + # In other words, this method exists so that we never attempt to call + # Datadog.components from the code tracker. + def current_component + LOCK.synchronize do + @current_components&.last + end + end + + # To avoid potential races with DI::Component being added and removed, + # we maintain a list of the components. Normally the list should contain + # either zero or one component depending on whether DI is enabled in + # Datadog configuration. However, if a new instance of DI::Component + # is created while the previous instance is still running, we are + # guaranteed to not end up with no component when one is running. + def add_current_component(component) + LOCK.synchronize do + @current_components ||= [] + @current_components << component + end + end + + def remove_current_component(component) + LOCK.synchronize do + @current_components&.delete(component) + end + end + end + end +end diff --git a/lib/datadog/di/code_tracker.rb b/lib/datadog/di/code_tracker.rb index 023f7ea4fdd..15878deb9a3 100644 --- a/lib/datadog/di/code_tracker.rb +++ b/lib/datadog/di/code_tracker.rb @@ -2,6 +2,8 @@ # rubocop:disable Lint/AssignmentInCondition +require_relative 'error' + module Datadog module DI # Tracks loaded Ruby code by source file and maintains a map from @@ -87,9 +89,10 @@ def start # rescue any exceptions that might not be handled to not break said # customer applications. rescue => exc - # TODO we do not have DI.component defined yet, remove steep:ignore - # before release. - if component = DI.current_component # steep:ignore + # Code tracker may be loaded without the rest of DI, + # in which case DI.component will not yet be defined, + # but we will have DI.current_component (set to nil). + if component = DI.current_component raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions component.logger.debug { "di: unhandled exception in script_compiled trace point: #{exc.class}: #{exc}" } component.telemetry&.report(exc, description: "Unhandled exception in script_compiled trace point") diff --git a/lib/datadog/di/init.rb b/lib/datadog/di/init.rb index 0b6a9217c2d..2af4c29c325 100644 --- a/lib/datadog/di/init.rb +++ b/lib/datadog/di/init.rb @@ -4,7 +4,7 @@ # enable dynamic instrumentation for third-party libraries used by the # application. -require_relative '../di' +require_relative 'base' # Code tracking is required for line probes to work; see the comments # on the activate_tracking methods in di.rb for further details. diff --git a/sig/datadog/di.rbs b/sig/datadog/di.rbs index a8c7f3a4a97..375da13af21 100644 --- a/sig/datadog/di.rbs +++ b/sig/datadog/di.rbs @@ -1,21 +1,6 @@ module Datadog module DI - def self.code_tracker: () -> CodeTracker? - def self.component: () -> Component? - def self.current_component: () -> Component? - - def self.add_current_component: (Component) -> void - - def self.remove_current_component: (Component) -> void - - def self.activate_tracking: () -> void - - def self.activate_tracking!: () -> void - - def self.deactivate_tracking!: () -> void - - LOCK: Mutex end end diff --git a/sig/datadog/di/base.rbs b/sig/datadog/di/base.rbs new file mode 100644 index 00000000000..82d6d74e5dd --- /dev/null +++ b/sig/datadog/di/base.rbs @@ -0,0 +1,23 @@ +module Datadog + module DI + self.@code_tracker: CodeTracker? + + attr_reader self.code_tracker: CodeTracker? + + def self.activate_tracking: () -> void + + def self.activate_tracking!: () -> void + + def self.deactivate_tracking!: () -> void + + def self.code_tracking_active?: () -> bool + + def self.current_component: () -> Component? + + def self.add_current_component: (Component) -> void + + def self.remove_current_component: (Component) -> void + + LOCK: Mutex + end +end diff --git a/spec/datadog/core/environment/execution_spec.rb b/spec/datadog/core/environment/execution_spec.rb index ba709c90601..0cda18a9b00 100644 --- a/spec/datadog/core/environment/execution_spec.rb +++ b/spec/datadog/core/environment/execution_spec.rb @@ -59,10 +59,11 @@ context 'when in an IRB session' do it 'returns true' do # Ruby 2.6 does not have irb by default in a bundle, but has it outside of it. - _, err, = Bundler.with_unbundled_env do + _, err, status = Bundler.with_unbundled_env do Open3.capture3('irb', '--noprompt', '--noverbose', '--noecho', stdin_data: repl_script) end expect(err).to end_with('ACTUAL:true') + expect(status.exitstatus).to eq(0) end end @@ -203,11 +204,12 @@ def test_it_does_something_useful # Add our script to `env.rb`, which is always run before any feature is executed. File.write('features/support/env.rb', repl_script) - _, err, = Bundler.with_unbundled_env do + _, err, status = Bundler.with_unbundled_env do Open3.capture3('ruby', stdin_data: script) end expect(err).to include('ACTUAL:true') + expect(status.exitstatus).to eq(0) end end end @@ -270,7 +272,7 @@ def test_it_does_something_useful context 'when given WebMock', skip: Gem::Version.new(Bundler::VERSION) < Gem::Version.new('2') do it do - out, = Bundler.with_unbundled_env do + out, _err, status = Bundler.with_unbundled_env do Open3.capture3('ruby', stdin_data: <<-RUBY require 'bundler/inline' @@ -292,6 +294,7 @@ def test_it_does_something_useful end expect(out).to end_with('ACTUAL:true') + expect(status.exitstatus).to eq(0) end end end diff --git a/spec/datadog/di/init_spec.rb b/spec/datadog/di/init_spec.rb new file mode 100644 index 00000000000..45a5f650fc4 --- /dev/null +++ b/spec/datadog/di/init_spec.rb @@ -0,0 +1,75 @@ +require "datadog/di/spec_helper" +require 'open3' + +RSpec.describe 'DI initializer' do + di_test + + # rubocop:disable Lint/ConstantDefinitionInBlock + BOOTSTRAP_SCRIPT = <<-SCRIPT + if defined?(Datadog) && Datadog.constants != %i(VERSION) + raise "Datadog code loaded too early" + end + + require 'datadog/di/init' + + if Datadog.constants.sort != %i(DI VERSION) + raise "Too many datadog components loaded: \#{Datadog.constants}" + end + + unless Datadog::DI.code_tracker + raise "Code tracker not instantiated" + end + + unless Datadog::DI.code_tracker.send(:registry).empty? + raise "Code tracker registry is not empty" + end + + # Test load something + require 'open3' + + if Datadog::DI.code_tracker.send(:registry).empty? + raise "Code tracker did not add loaded file to registry" + end + + unless Datadog::DI.code_tracker.send(:registry).detect { |key, value| key =~ /open3.rb\\z/ } + raise "Loaded script not found in code tracker registry" + end + + if Datadog.constants.sort != %i(DI VERSION) + raise "Too many datadog components loaded at the end of execution: \#{Datadog.constants}" + end + SCRIPT + # rubocop:enable Lint/ConstantDefinitionInBlock + + context 'when loaded initially into a clean process' do + it 'loads only DI code tracker' do + out, status = Open3.capture2e('ruby', stdin_data: BOOTSTRAP_SCRIPT) + unless status.exitstatus == 0 + fail("Test script failed with exit status #{status.exitstatus}:\n#{out}") + end + end + end + + context 'when entire library is loaded after di bootstrapper' do + it 'keeps the mappings in code tracker prior to datadog load' do + script = <<-SCRIPT + #{BOOTSTRAP_SCRIPT} + + require 'datadog' + + # Should still have the open3 entry in code tracker + unless Datadog::DI.code_tracker.send(:registry).detect { |key, value| key =~ /open3.rb\\z/ } + raise "Loaded script not found in code tracker registry" + end + + unless defined?(Datadog::Tracing) + raise "Expected Datadog::Tracing to be defined after datadog was loaded" + end + SCRIPT + out, status = Open3.capture2e('ruby', stdin_data: script) + unless status.exitstatus == 0 + fail("Test script failed with exit status #{status.exitstatus}:\n#{out}") + end + end + end +end