Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Super agent health checks #2994

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions lib/new_relic/agent/configuration/default_source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2187,6 +2187,28 @@ def self.notify
:transform => DefaultSource.method(:convert_to_constant_list),
:description => 'Specify a list of exceptions you do not want the agent to strip when [strip_exception_messages](#strip_exception_messages-enabled) is `true`. Separate exceptions with a comma. For example, `"ImportantException,PreserveMessageException"`.'
},
# Super Agent
:'superagent.fleet_id' => {
:default => 'j2e4a6n0v1', # TODO: set default to nil before release
:public => true,
:type => String,
:allowed_from_server => false,
:description => 'This assigns a fleet id to the language agent. This id is generated by the super agent. If this setting is present, it indicates the agent is running in a super agent/fleet environment and health file(s) will be generated.'
},
:'superagent.health.delivery_location' => {
:default => 'health/', # TODO: set default to EMPTY_STRING before release
:public => true,
:type => String,
:allowed_from_server => false,
:description => 'A `file:` URI that specifies the fully qualified directory path for health file(s) to be written to. For example: `file:///var/lib/newrelic-super-agent/fleet/agents.d/<fleet_id>`. This configuration will be set by the super agent, or one of its components, prior to agent startup.'
},
:'superagent.health.frequency' => {
:default => 5,
:public => true,
:type => Integer,
:allowed_from_server => false,
:description => 'The interval, in seconds, of how often the health file(s) will be written to. This configuration will be set by the super agent, or one of its components, prior to agent startup.'
},
# Thread profiler
:'thread_profiler.enabled' => {
:default => DefaultSource.thread_profiler_enabled,
Expand Down
79 changes: 79 additions & 0 deletions lib/new_relic/agent/health_check.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# This file is distributed under New Relic's license terms.
# See https://github.com/newrelic/newrelic-ruby-agent/blob/main/LICENSE for complete details.
# frozen_string_literal: true

module NewRelic
module Agent
class HealthCheck
def initialize
# should we pass this in as an arg from the init_plugin method call?
@start_time = nano_time
# if they're configs, is it worth saving them in vars?
@fleet_id = NewRelic::Agent.config[:'superagent.fleet_id']
@delivery_location = NewRelic::Agent.config[:'superagent.health.delivery_location']
@frequency = NewRelic::Agent.config[:'superagent.health.frequency']
# @check? = false
end

# nope out if no delivery_location?
# seems like something for init_plugin
def validate_delivery_location
end

# TODO: check health
def health
'health: true'
end

# TODO: get valid status
def status
'status: Agent has shutdown'
end

# TODO: get actual last error
def last_error
'last_error: NR-APM-1000'
end

def start_time
"start_time_unix_nano: #{@start_time}"
end

def status_time
"status_time_unix_nano: #{nano_time}"
end

def nano_time
Process.clock_gettime(Process::CLOCK_REALTIME, :nanosecond)
end

def file_name
"health-#{NewRelic::Agent::GuidGenerator.generate_guid(32)}.yml"
end

def write_file
@path ||= find_or_create_file_path

File.open("#{@path}/#{file_name}", 'w') do |f|
f.write(contents) # add .to_yaml?
end
end

def contents
[health, status, last_error, status_time, start_time].join("\n")
end

# Adapted from AgentLogger
# rescue?
def find_or_create_file_path
for abs_path in [File.expand_path(@delivery_location),
File.expand_path(File.join('', @delivery_location))] do
if File.directory?(abs_path) || (Dir.mkdir(abs_path) rescue nil)
return abs_path[%r{^(.*?)/?$}]
end
end
nil
end
end
end
end
11 changes: 10 additions & 1 deletion lib/new_relic/control/instance_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require 'new_relic/agent/null_logger'
require 'new_relic/agent/memory_logger'
require 'new_relic/agent/agent_logger'
require 'new_relic/agent/health_check'

require_relative 'private_instance_methods'

Expand Down Expand Up @@ -53,7 +54,7 @@ def init_plugin(options = {})
env = determine_env(options)

configure_agent(env, options)

#health_check
# Be sure to only create once! RUBY-1020
create_logger(options)

Expand Down Expand Up @@ -153,6 +154,14 @@ def newrelic_root
self.class.newrelic_root
end

def health_check
return NewRelic::Agent.logger.debug('superagent.fleet_id not found, skipping health checks') unless NewRelic::Agent.config[:'superagent.fleet_id']
return NewRelic::Agent.logger.debug('superagent.health.file_destination not found, skipping health checks') unless NewRelic::Agent.config[:'superagent.health.delivery_location']

# NewRelic::Agent::HealthCheck.new
# start the loop here?
end

protected

def initialize(local_env, config_file_override = nil)
Expand Down
146 changes: 146 additions & 0 deletions test/new_relic/agent/health_check_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# This file is distributed under New Relic's license terms.
# See https://github.com/newrelic/newrelic-ruby-agent/blob/main/LICENSE for complete details.
# frozen_string_literal: true

require 'fileutils'
require_relative '../../test_helper'

class NewRelicHealthCheckTest < Minitest::Test
# example:
# health-bc21b5891f5e44fc9272caef924611a8.yml
# healthy: false
# status: Agent has shutdown
# last_error: NR-APM-1000
# status_time_unix_nano: 1724953624761000000
# start_time_unix_nano: 1724953587605000000

# maybe delete the file every time?
# def teardown
# FileUtils.rm_rf('health')
# end

def test_yaml_health_file_written_to_delivery_location
with_config(:'superagent.health.delivery_location' => 'health/') do
NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do
health_check = NewRelic::Agent::HealthCheck.new
health_check.write_file

assert File.directory?('health'), 'Directory not found'
assert File.exist?('health/health-abc123.yml'), 'File not found'
end
end
ensure
FileUtils.rm_rf('health')
end

# This might be on init...
def test_yaml_health_file_logs_error_when_delivery_location_invalid
end

def test_yaml_file_generated_if_superagent_fleet_id_present
end

def test_yaml_file_not_generated_if_superagent_fleet_id_absent
end

def test_yaml_file_name_has_health_plus_uuid_without_hyphens
health_check = NewRelic::Agent::HealthCheck.new
# ex: health-bc21b5891f5e44fc9272caef924611a8.yml
assert_match /health-(.*){32}\.ya?ml/, health_check.file_name
end

def test_yaml_health_file_written_on_interval
with_config(:'superagent.health.frequency' => 5) do

end
end

def test_agent_logs_errors_if_yaml_health_file_writing_fails
end

def test_yaml_file_has_health_field
with_config(:'superagent.health.delivery_location' => 'health/') do
NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do
health_check = NewRelic::Agent::HealthCheck.new
health_check.write_file

assert File.readlines('health/health-abc123.yml').grep(/health:/).any?
end
end
ensure
FileUtils.rm_rf('health')
end

def test_yaml_file_has_status_field
with_config(:'superagent.health.delivery_location' => 'health/') do
NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do
health_check = NewRelic::Agent::HealthCheck.new
health_check.write_file

assert File.readlines('health/health-abc123.yml').grep(/status:/).any?
end
end
ensure
FileUtils.rm_rf('health')
end

def test_yaml_file_has_last_error_field_when_status_not_healthy
with_config(:'superagent.health.delivery_location' => 'health/') do
NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do
health_check = NewRelic::Agent::HealthCheck.new
health_check.write_file

assert File.readlines('health/health-abc123.yml').grep(/last_error:/).any?
end
end
ensure
FileUtils.rm_rf('health')
end

def test_yaml_file_does_not_have_last_error_field_when_status_healthy
end

def test_yaml_file_has_start_time_unix_nano
# TODO - validate timestamp
# TODO - validate timestamp same for every file created by that instance
with_config(:'superagent.health.delivery_location' => 'health/') do
NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do
health_check = NewRelic::Agent::HealthCheck.new
health_check.write_file

assert File.readlines('health/health-abc123.yml').grep(/start_time_unix_nano:/).any?
end
end
ensure
FileUtils.rm_rf('health')
end

def test_yaml_file_has_status_time_unix_nano
# status_time_unix_nano:
# timestamp present
# timestamp in nanoseconds => milliseconds * 1000000
with_config(:'superagent.health.delivery_location' => 'health/') do
NewRelic::Agent::GuidGenerator.stub(:generate_guid, 'abc123') do
health_check = NewRelic::Agent::HealthCheck.new
health_check.write_file

assert File.readlines('health/health-abc123.yml').grep(/status_time_unix_nano:/).any?
end
end
ensure
FileUtils.rm_rf('health')
end

def test_yaml_file_fully_regenerated_on_each_interval
end

def test_unique_health_file_exists_per_process
# puma scenario?
end

def test_supportability_metric_generated_at_agent_startup
# Supportability/SuperAgent/Health/enabled
end

## ADD MORE TESTS FOR ERROR CODE BEHAVIOR
end
Loading