Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enh(agent): new agent check_health (#1944) #1970

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/scripts/agent_installer_test.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

# This script test CMA installer in silent mode

Set-PSDebug -Trace 2
#Set-PSDebug -Trace 2

function f_start_process([string]$sProcess, [string]$sArgs, [ref]$pOutPut) {
<#
Expand Down
1 change: 1 addition & 0 deletions agent/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ set( SRC_COMMON
${SRC_DIR}/check.cc
${SRC_DIR}/check_exec.cc
${SRC_DIR}/drive_size.cc
${SRC_DIR}/check_health.cc
${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.cc
${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.pb.cc
${SRC_DIR}/opentelemetry/proto/metrics/v1/metrics.pb.cc
Expand Down
6 changes: 5 additions & 1 deletion agent/doc/agent-doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,8 @@ So it works like that:
* check_drive_size post query in drive_size_thread queue
* drive_size_thread call os_fs_stats
* drive_size_thread post result in io_context
* io_context calls check_drive_size::_completion_handler
* io_context calls check_drive_size::_completion_handler

### check_health
This little check sends agent's statistics to the poller. In order to do that, each check shares a common checks_statistics object.
This object is created by scheduler each time agent receives config from poller. This object contains last check interval and last check duration of each command. The first time it's executed, it can send unknown state if there is no other yet executed checks.
51 changes: 50 additions & 1 deletion agent/inc/com/centreon/agent/check.hh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,44 @@ using engine_to_agent_request_ptr =
using time_point = std::chrono::system_clock::time_point;
using duration = std::chrono::system_clock::duration;

class checks_statistics {
struct check_stat {
std::string cmd_name;
duration last_check_interval;
duration last_check_duration;
};

using statistic_container = multi_index::multi_index_container<
check_stat,
multi_index::indexed_by<
multi_index::hashed_unique<
BOOST_MULTI_INDEX_MEMBER(check_stat, std::string, cmd_name)>,
boost::multi_index::ordered_non_unique<BOOST_MULTI_INDEX_MEMBER(
check_stat,
duration,
last_check_interval)>,
boost::multi_index::ordered_non_unique<BOOST_MULTI_INDEX_MEMBER(
check_stat,
duration,
last_check_duration)>>>;

statistic_container _stats;

public:
using pointer = std::shared_ptr<checks_statistics>;

void add_interval_stat(const std::string& cmd_name,
const duration& check_interval);

void add_duration_stat(const std::string& cmd_name,
const duration& check_interval);

const auto& get_ordered_by_interval() const { return _stats.get<1>(); }
const auto& get_ordered_by_duration() const { return _stats.get<2>(); }

size_t size() const { return _stats.size(); }
};

/**
* @brief nagios status values
*
Expand Down Expand Up @@ -90,6 +128,8 @@ class time_step {
time_point value() const { return _start_point + _step_index * _step; }

uint64_t get_step_index() const { return _step_index; }

duration get_step() const { return _step; }
};

/**
Expand Down Expand Up @@ -130,6 +170,10 @@ class check : public std::enable_shared_from_this<check> {
unsigned _running_check_index = 0;
completion_handler _completion_handler;

// statistics used by check_health
time_point _last_start;
checks_statistics::pointer _stat;

protected:
std::shared_ptr<asio::io_context> _io_context;
std::shared_ptr<spdlog::logger> _logger;
Expand Down Expand Up @@ -159,7 +203,8 @@ class check : public std::enable_shared_from_this<check> {
const std::string& command_name,
const std::string& cmd_line,
const engine_to_agent_request_ptr& cnf,
completion_handler&& handler);
completion_handler&& handler,
const checks_statistics::pointer& stat);

virtual ~check() = default;

Expand All @@ -178,6 +223,8 @@ class check : public std::enable_shared_from_this<check> {

time_point get_start_expected() const { return _start_expected.value(); }

const time_step & get_raw_start_expected() const { return _start_expected; }

const std::string& get_service() const { return _service; }

const std::string& get_command_name() const { return _command_name; }
Expand All @@ -201,6 +248,8 @@ class check : public std::enable_shared_from_this<check> {
static std::optional<bool> get_bool(const std::string& cmd_name,
const char* field_name,
const rapidjson::Value& val);

const checks_statistics& get_stats() const { return *_stat; }
};

} // namespace com::centreon::agent
Expand Down
6 changes: 4 additions & 2 deletions agent/inc/com/centreon/agent/check_exec.hh
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ class check_exec : public check {
const std::string& cmd_name,
const std::string& cmd_line,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

static std::shared_ptr<check_exec> load(
const std::shared_ptr<asio::io_context>& io_context,
Expand All @@ -108,7 +109,8 @@ class check_exec : public check {
const std::string& cmd_name,
const std::string& cmd_line,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

void start_check(const duration& timeout) override;

Expand Down
63 changes: 63 additions & 0 deletions agent/inc/com/centreon/agent/check_health.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Copyright 2024 Centreon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information : [email protected]
*/

#ifndef CENTREON_AGENT_HEALTH_CHECK_HH
#define CENTREON_AGENT_HEALTH_CHECK_HH

#include "check.hh"

namespace com::centreon::agent {

class check_health : public check {
unsigned _warning_check_interval;
unsigned _critical_check_interval;
unsigned _warning_check_duration;
unsigned _critical_check_duration;

std::string _info_output;

// we use this timer to delay measure in order to have some checks yet done
// when we will compute the first statistics
asio::system_timer _measure_timer;

void _measure_timer_handler(const boost::system::error_code& err,
unsigned start_check_index);

public:
check_health(const std::shared_ptr<asio::io_context>& io_context,
const std::shared_ptr<spdlog::logger>& logger,
time_point first_start_expected,
duration check_interval,
const std::string& serv,
const std::string& cmd_name,
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

static void help(std::ostream& help_stream);

void start_check(const duration& timeout) override;

e_status compute(std::string* output, std::list<common::perfdata>* perfs);
};

} // namespace com::centreon::agent

#endif // CENTREON_AGENT_HEALTH_CHECK_HH
28 changes: 28 additions & 0 deletions agent/inc/com/centreon/agent/config.hh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#ifndef CENTREON_AGENT_CONFIG_HH
#define CENTREON_AGENT_CONFIG_HH

#include <memory>
#include "com/centreon/common/grpc/grpc_config.hh"

namespace com::centreon::agent {
Expand Down Expand Up @@ -45,9 +46,36 @@ class config {
bool _reverse_connection;
unsigned _second_max_reconnect_backoff;

static std::unique_ptr<config> _global_conf;

public:
static const config& load(const std::string& path) {
_global_conf = std::make_unique<config>(path);
return *_global_conf;
}

/**
* @brief used only for UT
*
* @param reverse_connection
* @return const config&
*/
static const config& load(bool reverse_connection) {
_global_conf = std::make_unique<config>(reverse_connection);
return *_global_conf;
}

static const config& instance() { return *_global_conf; }

config(const std::string& path);

/**
* @brief used only for UT
*
* @param reverse_connection
*/
config(bool reverse_connection) : _reverse_connection(reverse_connection) {}

const std::string& get_endpoint() const { return _endpoint; }
spdlog::level::level_enum get_log_level() const { return _log_level; };
log_type get_log_type() const { return _log_type; }
Expand Down
3 changes: 2 additions & 1 deletion agent/inc/com/centreon/agent/drive_size.hh
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ class check_drive_size : public check {
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

virtual ~check_drive_size() = default;

Expand Down
3 changes: 2 additions & 1 deletion agent/inc/com/centreon/agent/native_check_base.hh
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ class native_check_base : public check {
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

std::shared_ptr<native_check_base<nb_metric>> shared_from_this() {
return std::static_pointer_cast<native_check_base<nb_metric>>(
Expand Down
3 changes: 2 additions & 1 deletion agent/inc/com/centreon/agent/native_check_cpu_base.hh
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ class native_check_cpu : public check {
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

virtual ~native_check_cpu() = default;

Expand Down
10 changes: 6 additions & 4 deletions agent/inc/com/centreon/agent/scheduler.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class scheduler : public std::enable_shared_from_this<scheduler> {
const std::string& /*cmd_name*/,
const std::string& /*cmd_line*/,
const engine_to_agent_request_ptr& /*engine to agent request*/,
check::completion_handler&&)>;
check::completion_handler&&,
const checks_statistics::pointer& /*stat*/)>;

private:
using check_queue =
Expand Down Expand Up @@ -164,7 +165,8 @@ class scheduler : public std::enable_shared_from_this<scheduler> {
const std::string& cmd_name,
const std::string& cmd_line,
const engine_to_agent_request_ptr& conf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

engine_to_agent_request_ptr get_last_message_to_agent() const {
return _conf;
Expand All @@ -187,10 +189,10 @@ scheduler::scheduler(
const std::shared_ptr<com::centreon::agent::MessageToAgent>& config,
sender&& met_sender,
chck_builder&& builder)
: _metric_sender(met_sender),
_io_context(io_context),
: _io_context(io_context),
_logger(logger),
_supervised_host(supervised_host),
_metric_sender(met_sender),
_send_timer(*io_context),
_check_timer(*io_context),
_check_builder(builder),
Expand Down
2 changes: 2 additions & 0 deletions agent/inc/com/centreon/agent/version.hh.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ constexpr unsigned CENTREON_AGENT_VERSION_MAJOR = @COLLECT_MAJOR@;
constexpr unsigned CENTREON_AGENT_VERSION_MINOR = @[email protected];
constexpr unsigned CENTREON_AGENT_VERSION_PATCH = @[email protected];

#define CENTREON_AGENT_VERSION "@COLLECT_MAJOR@.@COLLECT_MINOR@.@COLLECT_PATCH@"

#endif // !CCE_VERSION_HH
3 changes: 2 additions & 1 deletion agent/native_linux/inc/com/centreon/agent/check_cpu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ class check_cpu
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

static void help(std::ostream& help_stream);

Expand Down
6 changes: 4 additions & 2 deletions agent/native_linux/src/check_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ check_cpu::check_cpu(const std::shared_ptr<asio::io_context>& io_context,
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler)
check::completion_handler&& handler,
const checks_statistics::pointer& stat)
: native_check_cpu<check_cpu_detail::e_proc_stat_index::nb_field>(
io_context,
logger,
Expand All @@ -194,7 +195,8 @@ check_cpu::check_cpu(const std::shared_ptr<asio::io_context>& io_context,
cmd_line,
args,
cnf,
std::move(handler))
std::move(handler),
stat)

{
com::centreon::common::rapidjson_helper arg(args);
Expand Down
3 changes: 2 additions & 1 deletion agent/native_windows/inc/com/centreon/agent/check_cpu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ class check_cpu
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

~check_cpu();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ class check_memory : public native_check_base<
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

std::shared_ptr<native_check_detail::snapshot<
native_check_detail::e_memory_metric::nb_metric>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ class check_service
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

std::shared_ptr<native_check_detail::snapshot<
native_check_detail::e_service_metric::nb_service_metric>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class check_uptime : public check {
const std::string& cmd_line,
const rapidjson::Value& args,
const engine_to_agent_request_ptr& cnf,
check::completion_handler&& handler);
check::completion_handler&& handler,
const checks_statistics::pointer& stat);

static void help(std::ostream& help_stream);

Expand Down
Loading
Loading