Skip to content

Commit

Permalink
DEBUG-3210 DI: change logging to be appropriate for customer inspecti…
Browse files Browse the repository at this point in the history
…on (#4266)
  • Loading branch information
p-datadog authored Jan 8, 2025
1 parent e3ea38f commit 2e98fc4
Show file tree
Hide file tree
Showing 12 changed files with 53 additions and 61 deletions.
2 changes: 1 addition & 1 deletion lib/datadog/di/code_tracker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def start
# before release.
if component = DI.current_component # steep:ignore
raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions
component.logger.warn("Unhandled exception in script_compiled trace point: #{exc.class}: #{exc}")
component.logger.debug { "di: unhandled exception in script_compiled trace point: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception in script_compiled trace point")
# TODO test this path
else
Expand Down
12 changes: 8 additions & 4 deletions lib/datadog/di/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def build(settings, agent_settings, logger, telemetry: nil)
return unless settings.respond_to?(:dynamic_instrumentation) && settings.dynamic_instrumentation.enabled

unless settings.respond_to?(:remote) && settings.remote.enabled
logger.debug("Dynamic Instrumentation could not be enabled because Remote Configuration Management is not available. To enable Remote Configuration, see https://docs.datadoghq.com/agent/remote_config")
logger.warn("di: dynamic instrumentation could not be enabled because Remote Configuration Management is not available. To enable Remote Configuration, see https://docs.datadoghq.com/agent/remote_config")
return
end

Expand Down Expand Up @@ -55,12 +55,16 @@ def environment_supported?(settings, logger)
# TODO add tests?
unless settings.dynamic_instrumentation.internal.development
if Datadog::Core::Environment::Execution.development?
logger.debug("Not enabling dynamic instrumentation because we are in development environment")
logger.warn("di: development environment detected; not enabling dynamic instrumentation")
return false
end
end
if RUBY_ENGINE != 'ruby' || RUBY_VERSION < '2.6'
logger.debug("Not enabling dynamic instrumentation because of unsupported Ruby version")
if RUBY_ENGINE != 'ruby'
logger.warn("di: cannot enable dynamic instrumentation: MRI is required, but running on #{RUBY_ENGINE}")
return false
end
if RUBY_VERSION < '2.6'
logger.warn("di: cannot enable dynamic instrumentation: Ruby 2.6+ is required, but running on #{RUBY_VERSION}")
return false
end
true
Expand Down
8 changes: 4 additions & 4 deletions lib/datadog/di/instrumenter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,13 @@ def hook_line(probe, &block)
end
rescue => exc
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}")
logger.debug { "di: unhandled exception in line trace point: #{exc.class}: #{exc}" }
telemetry&.report(exc, description: "Unhandled exception in line trace point")
# TODO test this path
end
rescue => exc
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
logger.warn("Unhandled exception in line trace point: #{exc.class}: #{exc}")
logger.debug { "di: unhandled exception in line trace point: #{exc.class}: #{exc}" }
telemetry&.report(exc, description: "Unhandled exception in line trace point")
# TODO test this path
end
Expand Down Expand Up @@ -356,7 +356,7 @@ def hook(probe, &block)
hook_line(probe, &block)
else
# TODO add test coverage for this path
logger.warn("Unknown probe type to hook: #{probe}")
logger.debug { "di: unknown probe type to hook: #{probe}" }
end
end

Expand All @@ -367,7 +367,7 @@ def unhook(probe)
unhook_line(probe)
else
# TODO add test coverage for this path
logger.warn("Unknown probe type to unhook: #{probe}")
logger.debug { "di: unknown probe type to unhook: #{probe}" }
end
end

Expand Down
8 changes: 4 additions & 4 deletions lib/datadog/di/probe_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def initialize(settings, instrumenter, probe_notification_builder,
install_pending_method_probes(tp.self)
rescue => exc
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
logger.warn("Unhandled exception in definition trace point: #{exc.class}: #{exc}")
logger.debug { "di: unhandled exception in definition trace point: #{exc.class}: #{exc}" }
telemetry&.report(exc, description: "Unhandled exception in definition trace point")
# TODO test this path
end
Expand Down Expand Up @@ -120,7 +120,7 @@ def add_probe(probe)
# In "propagate all exceptions" mode we will try to instrument again.
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions

logger.warn("Error processing probe configuration: #{exc.class}: #{exc}")
logger.debug { "di: error processing probe configuration: #{exc.class}: #{exc}" }
telemetry&.report(exc, description: "Error processing probe configuration")
# TODO report probe as failed to agent since we won't attempt to
# install it again.
Expand Down Expand Up @@ -160,7 +160,7 @@ def remove_other_probes(probe_ids)
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
# Silence all exceptions?
# TODO should we propagate here and rescue upstream?
logger.warn("Error removing probe #{probe.id}: #{exc.class}: #{exc}")
logger.debug { "di: error removing probe #{probe.id}: #{exc.class}: #{exc}" }
telemetry&.report(exc, description: "Error removing probe")
end
end
Expand Down Expand Up @@ -190,7 +190,7 @@ def remove_other_probes(probe_ids)
rescue => exc
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions

logger.warn("Error installing probe after class is defined: #{exc.class}: #{exc}")
logger.debug { "di: error installing probe after class is defined: #{exc.class}: #{exc}" }
telemetry&.report(exc, description: "Error installing probe after class is defined")
end
end
Expand Down
8 changes: 4 additions & 4 deletions lib/datadog/di/probe_notifier_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def start
rescue => exc
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions

logger.warn("Error in probe notifier worker: #{exc.class}: #{exc} (at #{exc.backtrace.first})")
logger.debug { "di: error in probe notifier worker: #{exc.class}: #{exc} (at #{exc.backtrace.first})" }
telemetry&.report(exc, description: "Error in probe notifier worker")
end
@lock.synchronize do
Expand Down Expand Up @@ -184,7 +184,7 @@ def io_in_progress?
@lock.synchronize do
queue = send("#{event_type}_queue")
if queue.length > settings.dynamic_instrumentation.internal.snapshot_queue_capacity
logger.warn("#{self.class.name}: dropping #{event_type} because queue is full")
logger.debug { "di: #{self.class.name}: dropping #{event_type} because queue is full" }
else
queue << event
end
Expand Down Expand Up @@ -241,7 +241,7 @@ def set_sleep_remaining
end
rescue => exc
raise if settings.dynamic_instrumentation.internal.propagate_all_exceptions
logger.warn("failed to send #{event_name}: #{exc.class}: #{exc} (at #{exc.backtrace.first})")
logger.debug { "di: failed to send #{event_name}: #{exc.class}: #{exc} (at #{exc.backtrace.first})" }
# Should we report this error to telemetry? Most likely failure
# to send is due to a network issue, and trying to send a
# telemetry message would also fail.
Expand All @@ -252,7 +252,7 @@ def set_sleep_remaining
# Normally the queue should only be consumed in this method,
# however if anyone consumes it elsewhere we don't want to block
# while consuming it here. Rescue ThreadError and return.
logger.warn("Unexpected #{event_name} queue underflow - consumed elsewhere?")
logger.debug { "di: unexpected #{event_name} queue underflow - consumed elsewhere?" }
telemetry&.report(exc, description: "Unexpected #{event_name} queue underflow")
ensure
@lock.synchronize do
Expand Down
8 changes: 4 additions & 4 deletions lib/datadog/di/remote.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def receivers(telemetry)
probe = ProbeBuilder.build_from_remote_config(probe_spec)
payload = component.probe_notification_builder.build_received(probe)
component.probe_notifier_worker.add_status(payload)
component.logger.info("Received probe from RC: #{probe.type} #{probe.location}")
component.logger.debug { "di: received probe from RC: #{probe.type} #{probe.location}" }

begin
# TODO test exception capture
Expand All @@ -60,7 +60,7 @@ def receivers(telemetry)
rescue => exc
raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions

component.logger.warn("Unhandled exception adding probe in DI remote receiver: #{exc.class}: #{exc}")
component.logger.debug { "di: unhandled exception adding probe in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception adding probe in DI remote receiver")

# If a probe fails to install, we will mark the content
Expand All @@ -81,7 +81,7 @@ def receivers(telemetry)
rescue => exc
raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions

component.logger.warn("Unhandled exception handling probe in DI remote receiver: #{exc.class}: #{exc}")
component.logger.debug { "di: unhandled exception handling probe in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception handling probe in DI remote receiver")

content.errored("Error applying dynamic instrumentation configuration: #{exc.class.name} #{exc.message}: #{Array(exc.backtrace).join("\n")}")
Expand All @@ -95,7 +95,7 @@ def receivers(telemetry)
rescue => exc
raise if component.settings.dynamic_instrumentation.internal.propagate_all_exceptions

component.logger.warn("Unhandled exception removing probes in DI remote receiver: #{exc.class}: #{exc}")
component.logger.debug { "di: unhandled exception removing probes in DI remote receiver: #{exc.class}: #{exc}" }
component.telemetry&.report(exc, description: "Unhandled exception removing probes in DI remote receiver")
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/datadog/di/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
end

it 'returns nil' do
expect(logger).to receive(:debug).with(/Dynamic Instrumentation could not be enabled because Remote Configuration Management is not available/)
expect(logger).to receive(:warn).with(/dynamic instrumentation could not be enabled because Remote Configuration Management is not available/)
component = described_class.build(settings, agent_settings, logger)
expect(component).to be nil
end
Expand Down
2 changes: 1 addition & 1 deletion spec/datadog/di/instrumenter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@
# Needed for the cleanup unhook call.
allow(probe).to receive(:method?).and_return(false)
allow(probe).to receive(:line?).and_return(false)
allow(logger).to receive(:warn)
allow(logger).to receive(:debug)
end

it 'raises ArgumentError' do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def do_rc
end

it 'adds a probe to pending list' do
expect(logger).to receive(:info).with(/Received probe from RC:/)
expect_lazy_log(logger, :debug, /received probe from RC:/)

do_rc

Expand Down Expand Up @@ -272,7 +272,7 @@ def assert_received_and_installed
end

it 'instruments code and adds probe to installed list' do
expect(logger).to receive(:info).with(/Received probe from RC:/)
expect_lazy_log(logger, :debug, /received probe from RC:/)

do_rc
assert_received_and_installed
Expand All @@ -282,7 +282,7 @@ def assert_received_and_installed

context 'and target method is invoked' do
it 'notifies about execution' do
expect(logger).to receive(:info).with(/Received probe from RC:/)
expect_lazy_log(logger, :debug, /received probe from RC:/)

do_rc
assert_received_and_installed
Expand Down Expand Up @@ -325,8 +325,8 @@ def assert_received_and_installed
end

it 'installs the second, known, probe' do
expect(logger).to receive(:warn).with(/Unrecognized probe type:/)
expect(logger).to receive(:info).with(/Received probe from RC:/)
expect_lazy_log(logger, :debug, /Unrecognized probe type:/)
expect_lazy_log(logger, :debug, /received probe from RC:/)

do_rc
assert_received_and_installed
Expand Down
12 changes: 3 additions & 9 deletions spec/datadog/di/probe_manager_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ class ProbeManagerSpecTestClass; end

context 'when there is an exception during instrumentation' do
it 'logs warning, drops probe and reraises the exception' do
expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Error processing probe configuration.*Instrumentation error/)
end
expect_lazy_log(logger, :debug, /error processing probe configuration.*Instrumentation error/)

expect(instrumenter).to receive(:hook) do |probe_|
expect(probe_).to be(probe)
Expand Down Expand Up @@ -180,9 +178,7 @@ class ProbeManagerSpecTestClass; end
it 'logs warning and keeps probe in installed list' do
expect(instrumenter).to receive(:unhook).with(probe).and_raise("Deinstrumentation error")

expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Error removing probe.*Deinstrumentation error/)
end
expect_lazy_log(logger, :debug, /error removing probe.*Deinstrumentation error/)

manager.remove_other_probes(['123'])

Expand All @@ -207,9 +203,7 @@ class ProbeManagerSpecTestClass; end
expect(instrumenter).to receive(:unhook).with(probe).and_raise("Deinstrumentation error")
expect(instrumenter).to receive(:unhook).with(probe2)

expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Error removing probe.*Deinstrumentation error/)
end
expect_lazy_log(logger, :debug, /error removing probe.*Deinstrumentation error/)

manager.remove_other_probes(['123'])

Expand Down
32 changes: 8 additions & 24 deletions spec/datadog/di/remote_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,7 @@

it 'calls probe manager to add a probe' do
expect(component).to receive(:logger).and_return(logger)
expect(logger).to receive(:info) do |message|
expect(message).to match(/Received probe/)
end
expect_lazy_log(logger, :debug, /received probe/)

expect(probe_manager).to receive(:add_probe) do |probe|
expect(probe.id).to eq('11')
Expand All @@ -165,13 +163,9 @@
it 'logs warning and consumes the exception' do
expect(component).to receive(:telemetry).and_return(telemetry)
expect(component).to receive(:logger).and_return(logger)
expect(logger).to receive(:info) do |message|
expect(message).to match(/Received probe/)
end
expect_lazy_log(logger, :debug, /received probe/)

expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Unhandled exception.*Runtime error from test/)
end
expect_lazy_log(logger, :debug, /unhandled exception.*Runtime error from test/)
expect(component).to receive(:logger).and_return(logger)
expect(telemetry).to receive(:report)

Expand All @@ -189,13 +183,9 @@
it 'calls probe manager to remove stale probes' do
allow(component).to receive(:telemetry)
expect(component).to receive(:logger).and_return(logger)
expect(logger).to receive(:info) do |message|
expect(message).to match(/Received probe/)
end
expect_lazy_log(logger, :debug, /received probe/)

expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Unhandled exception.*Runtime error from test/)
end
expect_lazy_log(logger, :debug, /unhandled exception.*Runtime error from test/)

allow(probe_manager).to receive(:add_probe).and_raise("Runtime error from test")
expect(component).to receive(:logger).and_return(logger)
Expand All @@ -212,13 +202,9 @@
it 'logs warning and consumes the exception' do
expect(component).to receive(:telemetry).and_return(telemetry).at_least(:once)
expect(component).to receive(:logger).and_return(logger)
expect(logger).to receive(:info) do |message|
expect(message).to match(/Received probe/)
end
expect_lazy_log(logger, :debug, /received probe/)

expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Unhandled exception.*Runtime error 1 from test/)
end
expect_lazy_log(logger, :debug, /unhandled exception.*Runtime error 1 from test/)
expect(telemetry).to receive(:report)

allow(probe_manager).to receive(:add_probe).and_raise("Runtime error 1 from test")
Expand All @@ -228,9 +214,7 @@
expect(component).to receive(:probe_notifier_worker).and_return(probe_notifier_worker)
expect(probe_notifier_worker).to receive(:add_status)

expect(logger).to receive(:warn) do |msg|
expect(msg).to match(/Unhandled exception.*Runtime error 2 from test/)
end
expect_lazy_log(logger, :debug, /unhandled exception.*Runtime error 2 from test/)
expect(component).to receive(:logger).and_return(logger)
expect(telemetry).to receive(:report)

Expand Down
10 changes: 10 additions & 0 deletions spec/datadog/di/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@ def deep_stringify_keys(hash)
def instance_double_agent_settings
instance_double(Datadog::Core::Configuration::AgentSettingsResolver::AgentSettings)
end

def expect_lazy_log(logger, meth, expected_msg)
expect(logger).to receive(meth) do |&block|
if expected_msg.is_a?(String)
expect(block.call).to eq(expected_msg)
else
expect(block.call).to match(expected_msg)
end
end
end
end
end

Expand Down

0 comments on commit 2e98fc4

Please sign in to comment.