diff --git a/.github/scripts/agent_installer_test.ps1 b/.github/scripts/agent_installer_test.ps1 index 814b455b60..e631c681e5 100644 --- a/.github/scripts/agent_installer_test.ps1 +++ b/.github/scripts/agent_installer_test.ps1 @@ -18,7 +18,7 @@ # This script test CMA installer in silent mode -Set-PSDebug -Trace 2 +#Set-PSDebug -Trace 2 function f_start_process([string]$sProcess, [string]$sArgs, [ref]$pOutPut) { <# diff --git a/agent/CMakeLists.txt b/agent/CMakeLists.txt index ddb71e5461..fcb871ef42 100644 --- a/agent/CMakeLists.txt +++ b/agent/CMakeLists.txt @@ -114,6 +114,7 @@ set( SRC_COMMON ${SRC_DIR}/check.cc ${SRC_DIR}/check_exec.cc ${SRC_DIR}/drive_size.cc + ${SRC_DIR}/check_health.cc ${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.grpc.pb.cc ${SRC_DIR}/opentelemetry/proto/collector/metrics/v1/metrics_service.pb.cc ${SRC_DIR}/opentelemetry/proto/metrics/v1/metrics.pb.cc diff --git a/agent/doc/agent-doc.md b/agent/doc/agent-doc.md index 7d051131b1..062b962f3f 100644 --- a/agent/doc/agent-doc.md +++ b/agent/doc/agent-doc.md @@ -123,4 +123,8 @@ So it works like that: * check_drive_size post query in drive_size_thread queue * drive_size_thread call os_fs_stats * drive_size_thread post result in io_context -* io_context calls check_drive_size::_completion_handler \ No newline at end of file +* io_context calls check_drive_size::_completion_handler + +### check_health +This little check sends agent's statistics to the poller. In order to do that, each check shares a common checks_statistics object. +This object is created by scheduler each time agent receives config from poller. This object contains last check interval and last check duration of each command. The first time it's executed, it can send unknown state if there is no other yet executed checks. \ No newline at end of file diff --git a/agent/inc/com/centreon/agent/check.hh b/agent/inc/com/centreon/agent/check.hh index e10da69bbc..e4c1511b7e 100644 --- a/agent/inc/com/centreon/agent/check.hh +++ b/agent/inc/com/centreon/agent/check.hh @@ -30,6 +30,44 @@ using engine_to_agent_request_ptr = using time_point = std::chrono::system_clock::time_point; using duration = std::chrono::system_clock::duration; +class checks_statistics { + struct check_stat { + std::string cmd_name; + duration last_check_interval; + duration last_check_duration; + }; + + using statistic_container = multi_index::multi_index_container< + check_stat, + multi_index::indexed_by< + multi_index::hashed_unique< + BOOST_MULTI_INDEX_MEMBER(check_stat, std::string, cmd_name)>, + boost::multi_index::ordered_non_unique, + boost::multi_index::ordered_non_unique>>; + + statistic_container _stats; + + public: + using pointer = std::shared_ptr; + + void add_interval_stat(const std::string& cmd_name, + const duration& check_interval); + + void add_duration_stat(const std::string& cmd_name, + const duration& check_interval); + + const auto& get_ordered_by_interval() const { return _stats.get<1>(); } + const auto& get_ordered_by_duration() const { return _stats.get<2>(); } + + size_t size() const { return _stats.size(); } +}; + /** * @brief nagios status values * @@ -90,6 +128,8 @@ class time_step { time_point value() const { return _start_point + _step_index * _step; } uint64_t get_step_index() const { return _step_index; } + + duration get_step() const { return _step; } }; /** @@ -130,6 +170,10 @@ class check : public std::enable_shared_from_this { unsigned _running_check_index = 0; completion_handler _completion_handler; + // statistics used by check_health + time_point _last_start; + checks_statistics::pointer _stat; + protected: std::shared_ptr _io_context; std::shared_ptr _logger; @@ -159,7 +203,8 @@ class check : public std::enable_shared_from_this { const std::string& command_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - completion_handler&& handler); + completion_handler&& handler, + const checks_statistics::pointer& stat); virtual ~check() = default; @@ -178,6 +223,8 @@ class check : public std::enable_shared_from_this { time_point get_start_expected() const { return _start_expected.value(); } + const time_step & get_raw_start_expected() const { return _start_expected; } + const std::string& get_service() const { return _service; } const std::string& get_command_name() const { return _command_name; } @@ -201,6 +248,8 @@ class check : public std::enable_shared_from_this { static std::optional get_bool(const std::string& cmd_name, const char* field_name, const rapidjson::Value& val); + + const checks_statistics& get_stats() const { return *_stat; } }; } // namespace com::centreon::agent diff --git a/agent/inc/com/centreon/agent/check_exec.hh b/agent/inc/com/centreon/agent/check_exec.hh index 49cdc2c04d..37b932c1d6 100644 --- a/agent/inc/com/centreon/agent/check_exec.hh +++ b/agent/inc/com/centreon/agent/check_exec.hh @@ -97,7 +97,8 @@ class check_exec : public check { const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); static std::shared_ptr load( const std::shared_ptr& io_context, @@ -108,7 +109,8 @@ class check_exec : public check { const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); void start_check(const duration& timeout) override; diff --git a/agent/inc/com/centreon/agent/check_health.hh b/agent/inc/com/centreon/agent/check_health.hh new file mode 100644 index 0000000000..d62dafd339 --- /dev/null +++ b/agent/inc/com/centreon/agent/check_health.hh @@ -0,0 +1,63 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#ifndef CENTREON_AGENT_HEALTH_CHECK_HH +#define CENTREON_AGENT_HEALTH_CHECK_HH + +#include "check.hh" + +namespace com::centreon::agent { + +class check_health : public check { + unsigned _warning_check_interval; + unsigned _critical_check_interval; + unsigned _warning_check_duration; + unsigned _critical_check_duration; + + std::string _info_output; + + // we use this timer to delay measure in order to have some checks yet done + // when we will compute the first statistics + asio::system_timer _measure_timer; + + void _measure_timer_handler(const boost::system::error_code& err, + unsigned start_check_index); + + public: + check_health(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler, + const checks_statistics::pointer& stat); + + static void help(std::ostream& help_stream); + + void start_check(const duration& timeout) override; + + e_status compute(std::string* output, std::list* perfs); +}; + +} // namespace com::centreon::agent + +#endif // CENTREON_AGENT_HEALTH_CHECK_HH diff --git a/agent/inc/com/centreon/agent/config.hh b/agent/inc/com/centreon/agent/config.hh index 6808041f31..0a7669ccfb 100644 --- a/agent/inc/com/centreon/agent/config.hh +++ b/agent/inc/com/centreon/agent/config.hh @@ -18,6 +18,7 @@ #ifndef CENTREON_AGENT_CONFIG_HH #define CENTREON_AGENT_CONFIG_HH +#include #include "com/centreon/common/grpc/grpc_config.hh" namespace com::centreon::agent { @@ -45,9 +46,36 @@ class config { bool _reverse_connection; unsigned _second_max_reconnect_backoff; + static std::unique_ptr _global_conf; + public: + static const config& load(const std::string& path) { + _global_conf = std::make_unique(path); + return *_global_conf; + } + + /** + * @brief used only for UT + * + * @param reverse_connection + * @return const config& + */ + static const config& load(bool reverse_connection) { + _global_conf = std::make_unique(reverse_connection); + return *_global_conf; + } + + static const config& instance() { return *_global_conf; } + config(const std::string& path); + /** + * @brief used only for UT + * + * @param reverse_connection + */ + config(bool reverse_connection) : _reverse_connection(reverse_connection) {} + const std::string& get_endpoint() const { return _endpoint; } spdlog::level::level_enum get_log_level() const { return _log_level; }; log_type get_log_type() const { return _log_type; } diff --git a/agent/inc/com/centreon/agent/drive_size.hh b/agent/inc/com/centreon/agent/drive_size.hh index 8b33cf10c7..94c4d3d460 100644 --- a/agent/inc/com/centreon/agent/drive_size.hh +++ b/agent/inc/com/centreon/agent/drive_size.hh @@ -255,7 +255,8 @@ class check_drive_size : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); virtual ~check_drive_size() = default; diff --git a/agent/inc/com/centreon/agent/native_check_base.hh b/agent/inc/com/centreon/agent/native_check_base.hh index a5c1d48ab8..158cad781d 100644 --- a/agent/inc/com/centreon/agent/native_check_base.hh +++ b/agent/inc/com/centreon/agent/native_check_base.hh @@ -135,7 +135,8 @@ class native_check_base : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); std::shared_ptr> shared_from_this() { return std::static_pointer_cast>( diff --git a/agent/inc/com/centreon/agent/native_check_cpu_base.hh b/agent/inc/com/centreon/agent/native_check_cpu_base.hh index 32131bb30d..0460cbf008 100644 --- a/agent/inc/com/centreon/agent/native_check_cpu_base.hh +++ b/agent/inc/com/centreon/agent/native_check_cpu_base.hh @@ -221,7 +221,8 @@ class native_check_cpu : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); virtual ~native_check_cpu() = default; diff --git a/agent/inc/com/centreon/agent/scheduler.hh b/agent/inc/com/centreon/agent/scheduler.hh index 6e18e47358..623b31bb61 100644 --- a/agent/inc/com/centreon/agent/scheduler.hh +++ b/agent/inc/com/centreon/agent/scheduler.hh @@ -42,7 +42,8 @@ class scheduler : public std::enable_shared_from_this { const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, const engine_to_agent_request_ptr& /*engine to agent request*/, - check::completion_handler&&)>; + check::completion_handler&&, + const checks_statistics::pointer& /*stat*/)>; private: using check_queue = @@ -164,7 +165,8 @@ class scheduler : public std::enable_shared_from_this { const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& conf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); engine_to_agent_request_ptr get_last_message_to_agent() const { return _conf; @@ -187,10 +189,10 @@ scheduler::scheduler( const std::shared_ptr& config, sender&& met_sender, chck_builder&& builder) - : _metric_sender(met_sender), - _io_context(io_context), + : _io_context(io_context), _logger(logger), _supervised_host(supervised_host), + _metric_sender(met_sender), _send_timer(*io_context), _check_timer(*io_context), _check_builder(builder), diff --git a/agent/inc/com/centreon/agent/version.hh.in b/agent/inc/com/centreon/agent/version.hh.in index f4c2d2e013..205199267d 100644 --- a/agent/inc/com/centreon/agent/version.hh.in +++ b/agent/inc/com/centreon/agent/version.hh.in @@ -25,4 +25,6 @@ constexpr unsigned CENTREON_AGENT_VERSION_MAJOR = @COLLECT_MAJOR@; constexpr unsigned CENTREON_AGENT_VERSION_MINOR = @COLLECT_MINOR@.0; constexpr unsigned CENTREON_AGENT_VERSION_PATCH = @COLLECT_PATCH@.0; +#define CENTREON_AGENT_VERSION "@COLLECT_MAJOR@.@COLLECT_MINOR@.@COLLECT_PATCH@" + #endif // !CCE_VERSION_HH diff --git a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh index 34750e2eda..9481f61fa0 100644 --- a/agent/native_linux/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_linux/inc/com/centreon/agent/check_cpu.hh @@ -86,7 +86,8 @@ class check_cpu const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); static void help(std::ostream& help_stream); diff --git a/agent/native_linux/src/check_cpu.cc b/agent/native_linux/src/check_cpu.cc index 8de1a02575..1959d1acd2 100644 --- a/agent/native_linux/src/check_cpu.cc +++ b/agent/native_linux/src/check_cpu.cc @@ -183,7 +183,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_cpu( io_context, logger, @@ -194,7 +195,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, cmd_line, args, cnf, - std::move(handler)) + std::move(handler), + stat) { com::centreon::common::rapidjson_helper arg(args); diff --git a/agent/native_windows/inc/com/centreon/agent/check_cpu.hh b/agent/native_windows/inc/com/centreon/agent/check_cpu.hh index 806a6cfca7..f1d8421293 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_cpu.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_cpu.hh @@ -132,7 +132,8 @@ class check_cpu const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); ~check_cpu(); diff --git a/agent/native_windows/inc/com/centreon/agent/check_memory.hh b/agent/native_windows/inc/com/centreon/agent/check_memory.hh index f5b9c6aaab..76a11b928a 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_memory.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_memory.hh @@ -82,7 +82,8 @@ class check_memory : public native_check_base< const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); std::shared_ptr> diff --git a/agent/native_windows/inc/com/centreon/agent/check_service.hh b/agent/native_windows/inc/com/centreon/agent/check_service.hh index 4625ab0e90..d35de66a8f 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_service.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_service.hh @@ -177,7 +177,8 @@ class check_service const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); std::shared_ptr> diff --git a/agent/native_windows/inc/com/centreon/agent/check_uptime.hh b/agent/native_windows/inc/com/centreon/agent/check_uptime.hh index 93748c7301..3a43d32f1c 100644 --- a/agent/native_windows/inc/com/centreon/agent/check_uptime.hh +++ b/agent/native_windows/inc/com/centreon/agent/check_uptime.hh @@ -41,7 +41,8 @@ class check_uptime : public check { const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler); + check::completion_handler&& handler, + const checks_statistics::pointer& stat); static void help(std::ostream& help_stream); diff --git a/agent/native_windows/src/check_cpu.cc b/agent/native_windows/src/check_cpu.cc index b3dd6d60ab..96ccc641ae 100644 --- a/agent/native_windows/src/check_cpu.cc +++ b/agent/native_windows/src/check_cpu.cc @@ -394,7 +394,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_cpu( io_context, logger, @@ -405,7 +406,8 @@ check_cpu::check_cpu(const std::shared_ptr& io_context, cmd_line, args, cnf, - std::move(handler)) + std::move(handler), + stat) { try { diff --git a/agent/native_windows/src/check_memory.cc b/agent/native_windows/src/check_memory.cc index 3f46188d43..ec94ba4c4d 100644 --- a/agent/native_windows/src/check_memory.cc +++ b/agent/native_windows/src/check_memory.cc @@ -376,7 +376,8 @@ check_memory::check_memory(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_base(io_context, logger, first_start_expected, @@ -386,7 +387,8 @@ check_memory::check_memory(const std::shared_ptr& io_context, cmd_line, args, cnf, - std::move(handler)) { + std::move(handler), + stat) { _no_percent_unit = "B"; if (args.IsObject()) { for (auto member_iter = args.MemberBegin(); member_iter != args.MemberEnd(); diff --git a/agent/native_windows/src/check_service.cc b/agent/native_windows/src/check_service.cc index 931bd49c25..f62679ab65 100644 --- a/agent/native_windows/src/check_service.cc +++ b/agent/native_windows/src/check_service.cc @@ -531,7 +531,8 @@ check_service::check_service( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : native_check_base(io_context, logger, first_start_expected, @@ -541,7 +542,8 @@ check_service::check_service( cmd_line, args, cnf, - std::move(handler)), + std::move(handler), + stat), _filter(args), _enumerator(_enumerator_constructor()) { if (!args.IsObject()) { diff --git a/agent/native_windows/src/check_uptime.cc b/agent/native_windows/src/check_uptime.cc index 6aa7dd83e5..31256ac3e2 100644 --- a/agent/native_windows/src/check_uptime.cc +++ b/agent/native_windows/src/check_uptime.cc @@ -53,7 +53,8 @@ check_uptime::check_uptime(const std::shared_ptr& io_context, const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -62,7 +63,8 @@ check_uptime::check_uptime(const std::shared_ptr& io_context, cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _second_warning_threshold(0), _second_critical_threshold(0) { com::centreon::common::rapidjson_helper arg(args); @@ -91,6 +93,9 @@ check_uptime::check_uptime(const std::shared_ptr& io_context, * @param timeout unused */ void check_uptime::start_check([[maybe_unused]] const duration& timeout) { + if (!_start_check(timeout)) { + return; + } std::string output; common::perfdata perf; e_status status = compute(GetTickCount64(), &output, &perf); diff --git a/agent/precomp_inc/precomp.hh b/agent/precomp_inc/precomp.hh index df066d6fe5..e53dd163dc 100644 --- a/agent/precomp_inc/precomp.hh +++ b/agent/precomp_inc/precomp.hh @@ -50,9 +50,17 @@ namespace asio = boost::asio; #include #include +#include +#include +#include +#include #include #include #include +#include "com/centreon/exceptions/msg_fmt.hh" + +namespace multi_index = boost::multi_index; + #endif diff --git a/agent/src/bireactor.cc b/agent/src/bireactor.cc index e26346be55..712d81c903 100644 --- a/agent/src/bireactor.cc +++ b/agent/src/bireactor.cc @@ -42,11 +42,11 @@ bireactor::bireactor( const std::string_view& class_name, const std::string& peer) : _write_pending(false), - _alive(true), _class_name(class_name), _peer(peer), _io_context(io_context), - _logger(logger) { + _logger(logger), + _alive(true) { SPDLOG_LOGGER_DEBUG(_logger, "create {} this={:p} peer:{}", _class_name, static_cast(this), _peer); } @@ -204,4 +204,4 @@ template class bireactor< template class bireactor< ::grpc::ServerBidiReactor>; -} // namespace com::centreon::agent \ No newline at end of file +} // namespace com::centreon::agent diff --git a/agent/src/check.cc b/agent/src/check.cc index fa6c28da6e..a730ad4a4c 100644 --- a/agent/src/check.cc +++ b/agent/src/check.cc @@ -16,12 +16,46 @@ * For more information : contact@centreon.com */ -#include "com/centreon/exceptions/msg_fmt.hh" - #include "check.hh" using namespace com::centreon::agent; +/** + * @brief update check interval of a check + * + * @param cmd_name name of command (entered by user in centreon UI) + * @param last_check_interval + */ +void checks_statistics::add_interval_stat(const std::string& cmd_name, + const duration& last_check_interval) { + auto it = _stats.find(cmd_name); + if (it == _stats.end()) { + _stats.insert({cmd_name, last_check_interval, {}}); + } else { + _stats.get<0>().modify(it, [last_check_interval](check_stat& it) { + it.last_check_interval = last_check_interval; + }); + } +} + +/** + * @brief update check duration of a check + * + * @param cmd_name name of command (entered by user in centreon UI) + * @param last_check_duration + */ +void checks_statistics::add_duration_stat(const std::string& cmd_name, + const duration& last_check_duration) { + auto it = _stats.find(cmd_name); + if (it == _stats.end()) { + _stats.insert({cmd_name, {}, last_check_duration}); + } else { + _stats.get<0>().modify(it, [last_check_duration](check_stat& it) { + it.last_check_duration = last_check_duration; + }); + } +} + const std::array check::status_label = { "OK: ", "WARNING: ", "CRITICAL: ", "UNKNOWN: "}; @@ -47,16 +81,18 @@ check::check(const std::shared_ptr& io_context, const std::string& command_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - completion_handler&& handler) + completion_handler&& handler, + const checks_statistics::pointer& stat) : _start_expected(first_start_expected, check_interval), _service(serv), _command_name(command_name), _command_line(cmd_line), _conf(cnf), - _io_context(io_context), - _logger(logger), _time_out_timer(*io_context), - _completion_handler(handler) {} + _completion_handler(handler), + _stat(stat), + _io_context(io_context), + _logger(logger) {} /** * @brief start timeout timer and init some flags used by timeout and completion @@ -87,6 +123,15 @@ bool check::_start_check(const duration& timeout) { _running_check = true; _start_timeout_timer(timeout); SPDLOG_LOGGER_TRACE(_logger, "start check for service {}", _service); + + time_point now = std::chrono::system_clock::now(); + + if (_last_start.time_since_epoch().count() != 0) { + _stat->add_interval_stat(_command_name, now - _last_start); + } + + _last_start = now; + return true; } @@ -148,6 +193,8 @@ void check::on_completion( _time_out_timer.cancel(); _running_check = false; ++_running_check_index; + _stat->add_duration_stat(_command_name, + std::chrono::system_clock::now() - _last_start); _completion_handler(shared_from_this(), status, perfdata, outputs); } } diff --git a/agent/src/check_exec.cc b/agent/src/check_exec.cc index 281a4eaf9e..27a1250f9a 100644 --- a/agent/src/check_exec.cc +++ b/agent/src/check_exec.cc @@ -122,7 +122,8 @@ check_exec::check_exec(const std::shared_ptr& io_context, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -131,7 +132,8 @@ check_exec::check_exec(const std::shared_ptr& io_context, cmd_name, cmd_line, cnf, - std::move(handler)) {} + std::move(handler), + stat) {} /** * @brief create and initialize a check_exec object (don't use constructor) @@ -158,10 +160,11 @@ std::shared_ptr check_exec::load( const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { std::shared_ptr ret = std::make_shared( io_context, logger, first_start_expected, check_interval, serv, cmd_name, - cmd_line, cnf, std::move(handler)); + cmd_line, cnf, std::move(handler), stat); ret->_init(); return ret; } diff --git a/agent/src/check_health.cc b/agent/src/check_health.cc new file mode 100644 index 0000000000..2e9668acb2 --- /dev/null +++ b/agent/src/check_health.cc @@ -0,0 +1,299 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include "check_health.hh" +#include +#include "com/centreon/common/rapidjson_helper.hh" +#include "config.hh" +#include "version.hh" + +using namespace com::centreon::agent; + +/** + * @brief Construct a new check_health object + * + * @param io_context + * @param logger + * @param first_start_expected + * @param check_interval + * @param serv + * @param cmd_name + * @param cmd_line + * @param args + * @param cnf + * @param handler + */ +check_health::check_health(const std::shared_ptr& io_context, + const std::shared_ptr& logger, + time_point first_start_expected, + duration check_interval, + const std::string& serv, + const std::string& cmd_name, + const std::string& cmd_line, + const rapidjson::Value& args, + const engine_to_agent_request_ptr& cnf, + check::completion_handler&& handler, + const checks_statistics::pointer& stat) + : check(io_context, + logger, + first_start_expected, + check_interval, + serv, + cmd_name, + cmd_line, + cnf, + std::move(handler), + stat), + _measure_timer(*io_context) { + com::centreon::common::rapidjson_helper arg(args); + try { + if (args.IsObject()) { + _warning_check_interval = arg.get_unsigned("warning-interval", 0); + _critical_check_interval = arg.get_unsigned("critical-interval", 0); + _warning_check_duration = arg.get_unsigned("warning-runtime", 0); + _critical_check_duration = arg.get_unsigned("critical-runtime", 0); + } + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(_logger, "check_health, fail to parse arguments: {}", + e.what()); + throw; + } + + if (config::instance().use_reverse_connection()) { + _info_output = "Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current " + "configuration: {} checks - Average runtime: {}s"; + } else { + _info_output = "Version: " CENTREON_AGENT_VERSION + " - Connection mode: Agent initiated - Current " + "configuration: {} checks - Average runtime: {}s"; + } +} + +/** + * @brief start a timer to do the job + * + * @param timeout unused + */ +void check_health::start_check([[maybe_unused]] const duration& timeout) { + if (!_start_check(timeout)) { + return; + } + + // we wait a little in order to have statistics check_interval/2 + _measure_timer.expires_from_now(get_raw_start_expected().get_step() / 2); + _measure_timer.async_wait( + [me = shared_from_this(), start_check_index = _get_running_check_index()]( + const boost::system::error_code& err) mutable { + std::static_pointer_cast(me)->_measure_timer_handler( + err, start_check_index); + }); +} + +/** + * @brief timer handler that do the job + * + * @param err set if canceled + * @param start_check_index used by on_completion + */ +void check_health::_measure_timer_handler(const boost::system::error_code& err, + unsigned start_check_index) { + if (err) { + return; + } + std::string output; + std::list perf; + e_status status = compute(&output, &perf); + + on_completion(start_check_index, status, perf, {output}); +} + +/** + * @brief calculate status, output and perfdata from statistics + * + * @param ms_uptime + * @param output + * @param perfs + * @return e_status + */ +e_status check_health::compute(std::string* output, + std::list* perf) { + e_status ret = e_status::ok; + + const checks_statistics& stats = get_stats(); + + if (stats.size() == 0) { + *output = "UNKNOWN: No check yet performed"; + return e_status::unknown; + } + + absl::flat_hash_set written_to_output; + + unsigned average_runtime = 0; + for (const auto& stat : stats.get_ordered_by_duration()) { + average_runtime += std::chrono::duration_cast( + stat.last_check_duration) + .count(); + } + + auto append_state_to_output = [&](e_status status, std::string* temp_output, + const auto& iter) { + if (written_to_output.insert(iter->cmd_name).second) { + if (temp_output->empty()) { + *temp_output = status_label[status]; + } else { + temp_output->push_back(','); + temp_output->push_back(' '); + } + if (status > ret) { + ret = status; + } + absl::StrAppend(temp_output, iter->cmd_name, " runtime:", + std::chrono::duration_cast( + iter->last_check_duration) + .count(), + "s interval:", + std::chrono::duration_cast( + iter->last_check_interval) + .count(), + "s"); + } + }; + + std::string critical_output; + if (_critical_check_duration > 0) { + auto critical_duration = std::chrono::seconds(_critical_check_duration); + for (auto iter = stats.get_ordered_by_duration().rbegin(); + iter != stats.get_ordered_by_duration().rend() && + iter->last_check_duration > critical_duration; + ++iter) { + append_state_to_output(e_status::critical, &critical_output, iter); + } + } + + if (_critical_check_interval > 0) { + auto critical_interval = std::chrono::seconds(_critical_check_interval); + for (auto iter = stats.get_ordered_by_interval().rbegin(); + iter != stats.get_ordered_by_interval().rend() && + iter->last_check_interval > critical_interval; + ++iter) { + append_state_to_output(e_status::critical, &critical_output, iter); + } + } + + std::string warning_output; + if (_warning_check_duration) { + auto warning_duration = std::chrono::seconds(_warning_check_duration); + for (auto iter = stats.get_ordered_by_duration().rbegin(); + iter != stats.get_ordered_by_duration().rend() && + iter->last_check_duration > warning_duration; + ++iter) { + append_state_to_output(e_status::warning, &warning_output, iter); + } + } + + if (_warning_check_interval) { + auto warning_interval = std::chrono::seconds(_warning_check_interval); + for (auto iter = stats.get_ordered_by_interval().rbegin(); + iter != stats.get_ordered_by_interval().rend() && + iter->last_check_interval > warning_interval; + ++iter) { + append_state_to_output(e_status::warning, &warning_output, iter); + } + } + + unsigned max_check_interval = + std::chrono::duration_cast( + stats.get_ordered_by_interval().rbegin()->last_check_interval) + .count(); + unsigned max_check_duration = + std::chrono::duration_cast( + stats.get_ordered_by_duration().rbegin()->last_check_duration) + .count(); + + auto& interval_perf = perf->emplace_back(); + interval_perf.name("interval"); + interval_perf.unit("s"); + interval_perf.value(max_check_interval); + if (_warning_check_interval > 0) { + interval_perf.warning_low(0); + interval_perf.warning(_warning_check_interval); + } + if (_critical_check_interval > 0) { + interval_perf.critical_low(0); + interval_perf.critical(_critical_check_interval); + } + + auto& duration_perf = perf->emplace_back(); + duration_perf.name("runtime"); + duration_perf.unit("s"); + duration_perf.value(max_check_duration); + if (_warning_check_duration > 0) { + duration_perf.warning_low(0); + duration_perf.warning(_warning_check_duration); + } + if (_critical_check_duration > 0) { + duration_perf.critical_low(0); + duration_perf.critical(_critical_check_duration); + } + + if (ret != e_status::ok) { + if (!critical_output.empty()) { + output->append(critical_output); + if (!warning_output.empty()) { + *output += " - "; + output->append(warning_output); + } + } else if (!warning_output.empty()) { + output->append(warning_output); + } + *output += " - "; + } else { + *output = "OK: "; + } + fmt::format_to(std::back_inserter(*output), _info_output, get_stats().size(), + average_runtime / get_stats().size()); + + return ret; +} + +void check_health::help(std::ostream& help_stream) { + help_stream << R"( +- health params: + - warning-interval (s): warning if a check interval is greater than this value + - critical-interval (s): critical if a check interval is greater than this value + - warning-runtime (s): warning if a check duration is greater than this value + - critical-runtime (s): critical if a check duration is greater than this value + An example of configuration: + { + "check": "health", + "args": { + "warning-runtime": 30, + "critical-runtime": 50, + "warning-interval": 60, + "critical-interval": "90" + } + } + Examples of output: + CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 runtime:20s interval:10s - Version: 24.11.0 - Connection mode: Poller initiated - Current configuration: 2 checks - Average runtime: 22s + Metrics: + runtime + interval + +)"; +} diff --git a/agent/src/config.cc b/agent/src/config.cc index 47098e0628..a6cb759e61 100644 --- a/agent/src/config.cc +++ b/agent/src/config.cc @@ -19,7 +19,6 @@ #include #include "com/centreon/common/rapidjson_helper.hh" -#include "com/centreon/exceptions/msg_fmt.hh" #include "config.hh" using namespace com::centreon::agent; @@ -103,6 +102,8 @@ const std::string_view config::config_schema(R"( )"); +std::unique_ptr config::_global_conf; + config::config(const std::string& path) { static common::json_validator validator(config_schema); rapidjson::Document file_content_d; diff --git a/agent/src/config_win.cc b/agent/src/config_win.cc index 64d822b04d..8abe509d39 100644 --- a/agent/src/config_win.cc +++ b/agent/src/config_win.cc @@ -18,11 +18,12 @@ #include -#include "com/centreon/exceptions/msg_fmt.hh" #include "config.hh" using namespace com::centreon::agent; +std::unique_ptr config::_global_conf; + /** * @brief Construct a new config::config object * diff --git a/agent/src/drive_size.cc b/agent/src/drive_size.cc index 6f27507d6e..f2e58b14a1 100644 --- a/agent/src/drive_size.cc +++ b/agent/src/drive_size.cc @@ -17,10 +17,7 @@ */ #include "drive_size.hh" -#include "check.hh" -#include "com/centreon/common/perfdata.hh" #include "com/centreon/common/rapidjson_helper.hh" -#include "com/centreon/exceptions/msg_fmt.hh" using namespace com::centreon::agent; @@ -336,7 +333,8 @@ check_drive_size::check_drive_size( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -345,7 +343,8 @@ check_drive_size::check_drive_size( cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _filter(std::make_shared(args)), _prct_threshold(false), _free_threshold(false), @@ -590,7 +589,7 @@ void check_drive_size::thread_kill() { void check_drive_size::help(std::ostream& help_stream) { help_stream << R"( -- storage params:" +- storage params: unit (default %): unit of threshold. If different from % threshold are in bytes free (default used): true: threshold is applied on free space and service become warning if free sapce is lower than threshold false: threshold is applied on used space and service become warning if used space is higher than threshold diff --git a/agent/src/main.cc b/agent/src/main.cc index 8617d55f9f..aac284607e 100644 --- a/agent/src/main.cc +++ b/agent/src/main.cc @@ -21,6 +21,8 @@ #include #include "check_cpu.hh" +#include "check_health.hh" + #include "config.hh" #include "drive_size.hh" #include "streaming_client.hh" @@ -107,12 +109,14 @@ int main(int argc, char* argv[]) { argv[0], config::config_schema); std::cout << std::endl << "Native checks options:" << std::endl; check_cpu::help(std::cout); + check_health::help(std::cout); return 1; } - std::unique_ptr conf; try { - conf = std::make_unique(argv[1]); + // mandatory to convert arg to a string to ensure of the choice of load + // method by compiler + config::load(std::string(argv[1])); } catch (const std::exception& e) { SPDLOG_ERROR("fail to parse config file {}: {}", argv[1], e.what()); return 1; @@ -126,20 +130,21 @@ int main(int argc, char* argv[]) { const std::string logger_name = "centreon-monitoring-agent"; - if (conf->get_log_type() == config::to_file) { + const config& conf = config::instance(); + if (conf.get_log_type() == config::to_file) { try { - if (!conf->get_log_file().empty()) { - if (conf->get_log_files_max_size() > 0 && - conf->get_log_files_max_number() > 0) { + if (!conf.get_log_file().empty()) { + if (conf.get_log_files_max_size() > 0 && + conf.get_log_files_max_number() > 0) { g_logger = spdlog::rotating_logger_mt( - logger_name, conf->get_log_file(), - conf->get_log_files_max_size() * 0x100000, - conf->get_log_files_max_number()); + logger_name, conf.get_log_file(), + conf.get_log_files_max_size() * 0x100000, + conf.get_log_files_max_number()); } else { SPDLOG_INFO( "no log-max-file-size option or no log-max-files option provided " "=> logs will not be rotated by centagent"); - g_logger = spdlog::basic_logger_mt(logger_name, conf->get_log_file()); + g_logger = spdlog::basic_logger_mt(logger_name, conf.get_log_file()); } } else { SPDLOG_ERROR( @@ -147,14 +152,14 @@ int main(int argc, char* argv[]) { g_logger = spdlog::stdout_color_mt(logger_name); } } catch (const std::exception& e) { - SPDLOG_CRITICAL("Can't log to {}: {}", conf->get_log_file(), e.what()); + SPDLOG_CRITICAL("Can't log to {}: {}", conf.get_log_file(), e.what()); return 2; } } else { g_logger = spdlog::stdout_color_mt(logger_name); } - g_logger->set_level(conf->get_log_level()); + g_logger->set_level(conf.get_log_level()); g_logger->flush_on(spdlog::level::warn); @@ -174,23 +179,23 @@ int main(int argc, char* argv[]) { _signals.async_wait(signal_handler); grpc_conf = std::make_shared( - conf->get_endpoint(), conf->use_encryption(), - read_file(conf->get_public_cert_file()), - read_file(conf->get_private_key_file()), - read_file(conf->get_ca_certificate_file()), conf->get_ca_name(), true, - 30, conf->get_second_max_reconnect_backoff()); + conf.get_endpoint(), conf.use_encryption(), + read_file(conf.get_public_cert_file()), + read_file(conf.get_private_key_file()), + read_file(conf.get_ca_certificate_file()), conf.get_ca_name(), true, 30, + conf.get_second_max_reconnect_backoff()); } catch (const std::exception& e) { SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); return -1; } - if (conf->use_reverse_connection()) { + if (conf.use_reverse_connection()) { _streaming_server = streaming_server::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } else { _streaming_client = streaming_client::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } try { diff --git a/agent/src/main_win.cc b/agent/src/main_win.cc index 099edcdeee..09128e057b 100644 --- a/agent/src/main_win.cc +++ b/agent/src/main_win.cc @@ -18,6 +18,7 @@ #include #include "check_cpu.hh" +#include "check_health.hh" #include "check_memory.hh" #include "check_service.hh" #include "check_uptime.hh" @@ -125,6 +126,7 @@ void show_help() { check_uptime::help(std::cout); check_drive_size::help(std::cout); check_service::help(std::cout); + check_health::help(std::cout); } /** @@ -135,11 +137,10 @@ void show_help() { * @return int exit status returned to command line (0 success) */ int _main(bool service_start) { - const char* registry_path = "SOFTWARE\\Centreon\\" SERVICE_NAME; + std::string registry_path = "SOFTWARE\\Centreon\\" SERVICE_NAME; - std::unique_ptr conf; try { - conf = std::make_unique(registry_path); + config::load(registry_path); } catch (const std::exception& e) { SPDLOG_ERROR("fail to read conf from registry {}: {}", registry_path, e.what()); @@ -163,37 +164,39 @@ int _main(bool service_start) { g_logger = std::make_shared("", sink); }; + const config& conf = config::instance(); + try { - if (conf->get_log_type() == config::to_file) { - if (!conf->get_log_file().empty()) { - if (conf->get_log_files_max_size() > 0 && - conf->get_log_files_max_number() > 0) { + if (conf.get_log_type() == config::to_file) { + if (!conf.get_log_file().empty()) { + if (conf.get_log_files_max_size() > 0 && + conf.get_log_files_max_number() > 0) { g_logger = spdlog::rotating_logger_mt( - logger_name, conf->get_log_file(), - conf->get_log_files_max_size() * 0x100000, - conf->get_log_files_max_number()); + logger_name, conf.get_log_file(), + conf.get_log_files_max_size() * 0x100000, + conf.get_log_files_max_number()); } else { SPDLOG_INFO( "no log-max-file-size option or no log-max-files option provided " "=> logs will not be rotated by centagent"); - g_logger = spdlog::basic_logger_mt(logger_name, conf->get_log_file()); + g_logger = spdlog::basic_logger_mt(logger_name, conf.get_log_file()); } } else { SPDLOG_ERROR( "log-type=file needs the option log-file => log to event log"); create_event_logger(); } - } else if (conf->get_log_type() == config::to_stdout) { + } else if (conf.get_log_type() == config::to_stdout) { g_logger = spdlog::stdout_color_mt(logger_name); } else { create_event_logger(); } } catch (const std::exception& e) { - SPDLOG_CRITICAL("Can't log to {}: {}", conf->get_log_file(), e.what()); + SPDLOG_CRITICAL("Can't log to {}: {}", conf.get_log_file(), e.what()); return 2; } - g_logger->set_level(conf->get_log_level()); + g_logger->set_level(conf.get_log_level()); g_logger->flush_on(spdlog::level::warn); @@ -206,23 +209,23 @@ int _main(bool service_start) { _signals.async_wait(signal_handler); grpc_conf = std::make_shared( - conf->get_endpoint(), conf->use_encryption(), - read_file(conf->get_public_cert_file()), - read_file(conf->get_private_key_file()), - read_file(conf->get_ca_certificate_file()), conf->get_ca_name(), true, - 30, conf->get_second_max_reconnect_backoff()); + conf.get_endpoint(), conf.use_encryption(), + read_file(conf.get_public_cert_file()), + read_file(conf.get_private_key_file()), + read_file(conf.get_ca_certificate_file()), conf.get_ca_name(), true, 30, + conf.get_second_max_reconnect_backoff()); } catch (const std::exception& e) { SPDLOG_CRITICAL("fail to parse input params: {}", e.what()); return -1; } - if (conf->use_reverse_connection()) { + if (conf.use_reverse_connection()) { _streaming_server = streaming_server::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } else { _streaming_client = streaming_client::load(g_io_context, g_logger, - grpc_conf, conf->get_host()); + grpc_conf, conf.get_host()); } try { diff --git a/agent/src/native_check_base.cc b/agent/src/native_check_base.cc index 6f005c02f5..593f89b406 100644 --- a/agent/src/native_check_base.cc +++ b/agent/src/native_check_base.cc @@ -94,7 +94,8 @@ native_check_base::native_check_base( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -103,7 +104,8 @@ native_check_base::native_check_base( cmd_name, cmd_line, cnf, - std::move(handler)) {} + std::move(handler), + stat) {} /** * @brief start a measure diff --git a/agent/src/native_check_cpu_base.cc b/agent/src/native_check_cpu_base.cc index d646c0fda2..01f04e5002 100644 --- a/agent/src/native_check_cpu_base.cc +++ b/agent/src/native_check_cpu_base.cc @@ -228,7 +228,8 @@ native_check_cpu::native_check_cpu( const std::string& cmd_line, const rapidjson::Value& args, const engine_to_agent_request_ptr& cnf, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, first_start_expected, @@ -237,7 +238,8 @@ native_check_cpu::native_check_cpu( cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _nb_core(std::thread::hardware_concurrency()), _cpu_detailed(false), diff --git a/agent/src/scheduler.cc b/agent/src/scheduler.cc index 8718a14818..08741b12a2 100644 --- a/agent/src/scheduler.cc +++ b/agent/src/scheduler.cc @@ -17,7 +17,10 @@ */ #include "scheduler.hh" +#include +#include "check.hh" #include "check_cpu.hh" +#include "check_health.hh" #ifdef _WIN32 #include "check_memory.hh" #include "check_service.hh" @@ -174,6 +177,9 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { conf->config().check_interval()); if (nb_check > 0) { + // raz stats in order to not keep statistics of deleted checks + checks_statistics::pointer stat = std::make_shared(); + duration time_between_check = std::chrono::microseconds(conf->config().check_interval() * 1000000) / nb_check; @@ -201,7 +207,8 @@ void scheduler::update(const engine_to_agent_request_ptr& conf) { const std::list& perfdata, const std::list& outputs) { me->_check_handler(check, status, perfdata, outputs); - }); + }, + stat); last_inserted_iter = _waiting_check_queue.emplace_hint( last_inserted_iter, check_to_schedule); next += time_between_check; @@ -303,10 +310,10 @@ void scheduler::stop() { * @param outputs */ void scheduler::_store_result_in_metrics( - const check::pointer& check, - unsigned status, - const std::list& perfdata, - const std::list& outputs) { + [[maybe_unused]] const check::pointer& check, + [[maybe_unused]] unsigned status, + [[maybe_unused]] const std::list& perfdata, + [[maybe_unused]] const std::list& outputs) { // auto scope_metrics = // get_scope_metrics(check->get_host(), check->get_service()); // unsigned now = std::chrono::duration_cast( @@ -544,7 +551,8 @@ std::shared_ptr scheduler::default_check_builder( const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& conf, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { using namespace std::literals; // test native checks where cmd_lin is a json try { @@ -562,24 +570,28 @@ std::shared_ptr scheduler::default_check_builder( if (check_type == "cpu_percentage"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); + } else if (check_type == "health"sv) { + return std::make_shared( + io_context, logger, first_start_expected, check_interval, service, + cmd_name, cmd_line, *args, conf, std::move(handler), stat); #ifdef _WIN32 } else if (check_type == "uptime"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); } else if (check_type == "storage"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); } else if (check_type == "memory"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); } else if (check_type == "service"sv) { return std::make_shared( io_context, logger, first_start_expected, check_interval, service, - cmd_name, cmd_line, *args, conf, std::move(handler)); + cmd_name, cmd_line, *args, conf, std::move(handler), stat); #endif } else { throw exceptions::msg_fmt("command {}, unknown native check:{}", cmd_name, @@ -588,6 +600,6 @@ std::shared_ptr scheduler::default_check_builder( } catch (const std::exception&) { return check_exec::load(io_context, logger, first_start_expected, check_interval, service, cmd_name, cmd_line, conf, - std::move(handler)); + std::move(handler), stat); } } diff --git a/agent/src/streaming_client.cc b/agent/src/streaming_client.cc index ab38cc6771..d93b7d93a6 100644 --- a/agent/src/streaming_client.cc +++ b/agent/src/streaming_client.cc @@ -191,7 +191,7 @@ void streaming_client::_send(const std::shared_ptr& request) { * @param request */ void streaming_client::on_incomming_request( - const std::shared_ptr& caller, + const std::shared_ptr& caller [[maybe_unused]], const std::shared_ptr& request) { // incoming request is used in main thread _io_context->post([request, sched = _sched]() { sched->update(request); }); diff --git a/agent/test/CMakeLists.txt b/agent/test/CMakeLists.txt index 6d5152ee6e..6150262e44 100644 --- a/agent/test/CMakeLists.txt +++ b/agent/test/CMakeLists.txt @@ -20,6 +20,7 @@ set( SRC_COMMON check_test.cc check_exec_test.cc drive_size_test.cc + check_health_test.cc scheduler_test.cc test_main.cc ) diff --git a/agent/test/check_exec_test.cc b/agent/test/check_exec_test.cc index 60f49bc77e..23966c702b 100644 --- a/agent/test/check_exec_test.cc +++ b/agent/test/check_exec_test.cc @@ -17,6 +17,8 @@ */ #include +#include +#include "check.hh" #include "check_exec.hh" @@ -47,9 +49,11 @@ TEST(check_exec_test, echo) { std::shared_ptr check = check_exec::load( g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), - [&](const std::shared_ptr& caller, + [&]([[maybe_unused]] const std::shared_ptr& + caller, int statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& output) { { std::lock_guard l(mut); @@ -57,7 +61,8 @@ TEST(check_exec_test, echo) { outputs = output; } cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); std::unique_lock l(mut); @@ -75,14 +80,17 @@ TEST(check_exec_test, timeout) { std::shared_ptr check = check_exec::load( g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), - [&](const std::shared_ptr& caller, + [&]([[maybe_unused]] const std::shared_ptr& + caller, int statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& output) { status = statuss; outputs = output; cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); int pid = check->get_pid(); @@ -119,9 +127,11 @@ TEST(check_exec_test, bad_command) { std::shared_ptr check = check_exec::load( g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), - [&](const std::shared_ptr& caller, + [&]([[maybe_unused]] const std::shared_ptr& + caller, int statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& output) { { std::lock_guard l(mut); @@ -131,7 +141,8 @@ TEST(check_exec_test, bad_command) { SPDLOG_INFO("end of {}", command_line); std::this_thread::sleep_for(std::chrono::milliseconds(50)); cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); std::unique_lock l(mut); @@ -156,14 +167,16 @@ TEST(check_exec_test, recurse_not_lock) { g_io_context, spdlog::default_logger(), {}, {}, serv, cmd_name, command_line, engine_to_agent_request_ptr(), [&](const std::shared_ptr& caller, int, - const std::list& perfdata, - const std::list& output) { + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& output) { if (!cpt) { ++cpt; caller->start_check(std::chrono::seconds(1)); } else cond.notify_one(); - }); + }, + std::make_shared()); check->start_check(std::chrono::seconds(1)); std::mutex mut; diff --git a/agent/test/check_health_test.cc b/agent/test/check_health_test.cc new file mode 100644 index 0000000000..339241d8ab --- /dev/null +++ b/agent/test/check_health_test.cc @@ -0,0 +1,339 @@ +/** + * Copyright 2024 Centreon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information : contact@centreon.com + */ + +#include +#include +#include "check.hh" +#include "com/centreon/common/rapidjson_helper.hh" + +#include "check_health.hh" +#include "config.hh" +#include "version.hh" + +extern std::shared_ptr g_io_context; + +using namespace com::centreon::agent; +using namespace std::string_literals; +using namespace com::centreon::common::literals; +using namespace std::chrono_literals; + +TEST(check_health_test, no_threshold_no_reverse) { + config::load(false); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : ""})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::ok); + EXPECT_EQ(output, "OK: Version: " CENTREON_AGENT_VERSION + " - Connection mode: Agent initiated - Current " + "configuration: 2 checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, no_threshold_reverse) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : ""})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::ok); + EXPECT_EQ(output, "OK: Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current " + "configuration: 2 checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_1) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "9", "critical-interval" : "14"})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::critical); + EXPECT_EQ(output, + "CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 " + "runtime:20s interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + EXPECT_EQ(perf.value(), 25); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 9); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 14); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_2) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "9", "critical-interval" : "14", "warning-runtime": 19, "critical-runtime":24})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::critical); + EXPECT_EQ(output, + "CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 " + "runtime:20s interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 19); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 24); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 9); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 14); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_3) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : "14", "warning-runtime": 19})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::critical); + EXPECT_EQ(output, + "CRITICAL: command2 runtime:25s interval:15s - WARNING: command1 " + "runtime:20s interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 19); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 14); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} + +TEST(check_health_test, threshold_4) { + config::load(true); + + rapidjson::Document check_args = + R"({ "warning-interval" : "", "critical-interval" : "16", "warning-runtime": 19})"_json; + + auto stats = std::make_shared(); + + stats->add_interval_stat("command1", 10s); + stats->add_duration_stat("command1", 20s); + stats->add_interval_stat("command2", 15s); + stats->add_duration_stat("command2", 25s); + + check_health checker( + g_io_context, spdlog::default_logger(), {}, {}, "serv"s, "cmd_name"s, + "cmd_line"s, check_args, nullptr, + []([[maybe_unused]] const std::shared_ptr& caller, + [[maybe_unused]] int status, + [[maybe_unused]] const std::list& + perfdata, + [[maybe_unused]] const std::list& outputs) {}, + stats); + + std::string output; + std::list perfs; + e_status ret = checker.compute(&output, &perfs); + EXPECT_EQ(ret, e_status::warning); + EXPECT_EQ(output, + "WARNING: command2 runtime:25s interval:15s, command1 runtime:20s " + "interval:10s - Version: " CENTREON_AGENT_VERSION + " - Connection mode: Poller initiated - Current configuration: 2 " + "checks - Average runtime: 22s"); + EXPECT_EQ(perfs.size(), 2); + for (const auto& perf : perfs) { + EXPECT_EQ(perf.unit(), "s"); + if (perf.name() == "runtime") { + EXPECT_EQ(perf.value(), 25); + EXPECT_EQ(perf.warning_low(), 0); + EXPECT_EQ(perf.warning(), 19); + EXPECT_TRUE(std::isnan(perf.critical_low())); + EXPECT_TRUE(std::isnan(perf.critical())); + } else if (perf.name() == "interval") { + EXPECT_EQ(perf.value(), 15); + EXPECT_TRUE(std::isnan(perf.warning_low())); + EXPECT_TRUE(std::isnan(perf.warning())); + EXPECT_EQ(perf.critical_low(), 0); + EXPECT_EQ(perf.critical(), 16); + } else { + FAIL() << "Unexpected perfdata name: " << perf.name(); + } + } +} diff --git a/agent/test/check_linux_cpu_test.cc b/agent/test/check_linux_cpu_test.cc index 37bb749578..f407a0089c 100644 --- a/agent/test/check_linux_cpu_test.cc +++ b/agent/test/check_linux_cpu_test.cc @@ -157,7 +157,8 @@ TEST(proc_stat_file_test, no_threshold) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -166,8 +167,6 @@ TEST(proc_stat_file_test, no_threshold) { ASSERT_EQ(perfs.size(), 5); - constexpr float nan_to_cmp = NAN; - for (const auto& perf : perfs) { ASSERT_TRUE(std::isnan(perf.critical_low())); ASSERT_TRUE(std::isnan(perf.critical())); @@ -222,8 +221,6 @@ TEST(proc_stat_file_test, no_threshold_detailed) { std::string output; std::list perfs; - static const char* conf_doc = R"({"cpu-detailed":true})"; - using namespace com::centreon::common::literals; rapidjson::Document check_args = R"({"cpu-detailed":"true"})"_json; @@ -234,7 +231,8 @@ TEST(proc_stat_file_test, no_threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -366,7 +364,8 @@ TEST(proc_stat_file_test, threshold_nodetailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -447,7 +446,8 @@ TEST(proc_stat_file_test, threshold_nodetailed2) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -506,7 +506,8 @@ TEST(proc_stat_file_test, threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -580,7 +581,8 @@ TEST(proc_stat_file_test, threshold_detailed2) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); @@ -654,7 +656,8 @@ TEST(proc_stat_file_test, threshold_detailed3) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); e_status status = checker.compute(first_measure, second_measure, &output, &perfs); diff --git a/agent/test/check_test.cc b/agent/test/check_test.cc index 71ec5999f0..ca81bdb37e 100644 --- a/agent/test/check_test.cc +++ b/agent/test/check_test.cc @@ -60,7 +60,8 @@ class dummy_check : public check { command_name, command_line, nullptr, - handler), + handler, + std::make_shared()), _command_duration(command_duration), _command_timer(*g_io_context) {} }; @@ -80,7 +81,8 @@ TEST(check_test, timeout) { serv, cmd_name, cmd_line, std::chrono::milliseconds(500), [&status, &output, &handler_call_cpt, &cond]( const std::shared_ptr&, unsigned statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& outputs) { status = statuss; if (outputs.size() == 1) { @@ -117,7 +119,8 @@ TEST(check_test, no_timeout) { serv, cmd_name, cmd_line, std::chrono::milliseconds(100), [&status, &output, &handler_call_cpt, &cond]( const std::shared_ptr&, unsigned statuss, - const std::list& perfdata, + [[maybe_unused]] const std::list& + perfdata, const std::list& outputs) { status = statuss; if (outputs.size() == 1) { diff --git a/agent/test/check_uptime_test.cc b/agent/test/check_uptime_test.cc index df851bf88d..d08756a387 100644 --- a/agent/test/check_uptime_test.cc +++ b/agent/test/check_uptime_test.cc @@ -39,7 +39,8 @@ TEST(native_check_uptime, ok) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -69,7 +70,8 @@ TEST(native_check_uptime, ok_m) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -99,7 +101,8 @@ TEST(native_check_uptime, ok_h) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -129,7 +132,8 @@ TEST(native_check_uptime, ok_d) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -159,7 +163,8 @@ TEST(native_check_uptime, ok_w) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -189,7 +194,8 @@ TEST(native_check_uptime, warning) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -219,7 +225,8 @@ TEST(native_check_uptime, warning_bis) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -249,7 +256,8 @@ TEST(native_check_uptime, critical) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; @@ -278,7 +286,8 @@ TEST(native_check_uptime, critical_bis) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); std::string output; com::centreon::common::perfdata perf; diff --git a/agent/test/check_windows_cpu_test.cc b/agent/test/check_windows_cpu_test.cc index 67b41cd7ab..cce9d37167 100644 --- a/agent/test/check_windows_cpu_test.cc +++ b/agent/test/check_windows_cpu_test.cc @@ -83,7 +83,8 @@ TEST(native_check_cpu_windows, output_no_threshold) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ(output, "OK: CPU(s) average usage is 50.00%"); @@ -145,7 +146,8 @@ TEST(native_check_cpu_windows, output_no_threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ(output, "OK: CPU(s) average usage is 50.00%"); @@ -240,7 +242,8 @@ TEST(native_check_cpu_windows, output_threshold) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ( @@ -313,7 +316,8 @@ TEST(native_check_cpu_windows, output_threshold_detailed) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); checker.compute(first, second, &output, &perfs); ASSERT_EQ( @@ -463,7 +467,8 @@ TEST(native_check_cpu_windows, compare_kernel_dph) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); rapidjson::Document pdh_check_args = R"({"use-nt-query-system-information":"false" })"_json; @@ -475,7 +480,8 @@ TEST(native_check_cpu_windows, compare_kernel_dph) { [[maybe_unused]] int status, [[maybe_unused]] const std::list& perfdata, - [[maybe_unused]] const std::list& outputs) {}); + [[maybe_unused]] const std::list& outputs) {}, + std::make_shared()); auto first_nt = nt_checker.get_cpu_time_snapshot(true); auto first_pdh = pdh_checker.get_cpu_time_snapshot(true); diff --git a/agent/test/check_windows_memory_test.cc b/agent/test/check_windows_memory_test.cc index fe0fecd554..cd5ca009d7 100644 --- a/agent/test/check_windows_memory_test.cc +++ b/agent/test/check_windows_memory_test.cc @@ -20,7 +20,6 @@ #include -#include "com/centreon/common/perfdata.hh" #include "com/centreon/common/rapidjson_helper.hh" #include "check_memory.hh" @@ -51,7 +50,8 @@ class test_check : public check_memory { [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}) {} + const std::list& outputs) {}, + std::make_shared()) {} std::shared_ptr> diff --git a/agent/test/check_windows_service_test.cc b/agent/test/check_windows_service_test.cc index dea07374f7..65facd7293 100644 --- a/agent/test/check_windows_service_test.cc +++ b/agent/test/check_windows_service_test.cc @@ -131,7 +131,8 @@ TEST(check_service, service_no_threshold_all_running) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -199,7 +200,8 @@ TEST(check_service, service_no_threshold_one_by_state) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -264,7 +266,8 @@ TEST(check_service, service_filter_exclude_all_service) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -327,7 +330,8 @@ TEST(check_service, service_filter_allow_some_service) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -396,7 +400,8 @@ TEST(check_service, service_filter_exclude_some_service) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -467,7 +472,8 @@ TEST(check_service, service_filter_allow_some_service_warning_running) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -538,7 +544,8 @@ TEST(check_service, service_filter_allow_some_service_warning_stopped) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -608,7 +615,8 @@ TEST(check_service, service_filter_allow_some_service_critical_state) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -678,7 +686,8 @@ TEST(check_service, service_filter_start_auto_true) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -747,7 +756,8 @@ TEST(check_service, service_filter_start_auto_false) { "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); @@ -818,7 +828,8 @@ TEST(check_service, "cmd_line"s, check_args, nullptr, [](const std::shared_ptr& caller, int status, const std::list& perfdata, - const std::list& outputs) {}); + const std::list& outputs) {}, + std::make_shared()); auto snap = test_check.measure(); diff --git a/agent/test/drive_size_test.cc b/agent/test/drive_size_test.cc index ddb0c9c269..f9757f990c 100644 --- a/agent/test/drive_size_test.cc +++ b/agent/test/drive_size_test.cc @@ -130,7 +130,8 @@ TEST_F(drive_size_test, test_fs_filter1) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -197,7 +198,8 @@ TEST_F(drive_size_test, test_fs_filter_percent) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -264,7 +266,8 @@ TEST_F(drive_size_test, test_fs_filter2) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -319,7 +322,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_2) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -383,7 +387,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_3) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -441,7 +446,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_4) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); { @@ -506,7 +512,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_5) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -566,7 +573,8 @@ TEST_F(drive_size_test, test_fs_filter_percent_6) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); @@ -626,7 +634,8 @@ TEST_F(drive_size_test, test_fs_filter_free_percent) { absl::MutexLock lck(&wait_m); perfs = perfdata; output = outputs.front(); - }); + }, + std::make_shared()); checker->start_check(std::chrono::seconds(1)); diff --git a/agent/test/scheduler_test.cc b/agent/test/scheduler_test.cc index 63354e8524..b2bd15bc81 100644 --- a/agent/test/scheduler_test.cc +++ b/agent/test/scheduler_test.cc @@ -45,7 +45,8 @@ class tempo_check : public check { const engine_to_agent_request_ptr& cnf, int command_exit_status, duration completion_delay, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, exp, @@ -54,7 +55,8 @@ class tempo_check : public check { cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _completion_timer(*io_context), _command_exit_status(command_exit_status), _completion_delay(completion_delay) {} @@ -72,7 +74,8 @@ class tempo_check : public check { _completion_timer.async_wait([me = shared_from_this(), this, check_running_index = _get_running_check_index()]( - const boost::system::error_code& err) { + [[maybe_unused]] const boost::system:: + error_code& err) { SPDLOG_TRACE("end of completion timer for serv {}", get_service()); me->on_completion( check_running_index, _command_exit_status, @@ -145,7 +148,9 @@ TEST_F(scheduler_test, no_config) { duration /* check interval */, const std::string& /*service*/, const std::string& /*cmd_name*/, const std::string& /*cmd_line*/, const engine_to_agent_request_ptr& /*engine to agent request*/, - check::completion_handler&&) { return std::shared_ptr(); }); + check::completion_handler&&, const checks_statistics::pointer&) { + return std::shared_ptr(); + }); std::weak_ptr weak_shed(sched); sched.reset(); @@ -188,11 +193,12 @@ TEST_F(scheduler_test, correct_schedule) { const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(50), std::move(handler)); + std::chrono::milliseconds(50), std::move(handler), stat); }); std::this_thread::sleep_for(std::chrono::milliseconds(10100)); @@ -261,11 +267,12 @@ TEST_F(scheduler_test, time_out) { const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(1500), std::move(handler)); + std::chrono::milliseconds(1500), std::move(handler), stat); }); std::unique_lock l(m); export_cond.wait(l); @@ -301,7 +308,6 @@ TEST_F(scheduler_test, time_out) { TEST_F(scheduler_test, correct_output_examplar) { std::shared_ptr exported_request; std::condition_variable export_cond; - time_point now = std::chrono::system_clock::now(); std::shared_ptr sched = scheduler::load( g_io_context, spdlog::default_logger(), "my_host", create_conf(2, 1, 2, 10, 1), @@ -315,11 +321,12 @@ TEST_F(scheduler_test, correct_output_examplar) { const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, - std::chrono::milliseconds(10), std::move(handler)); + std::chrono::milliseconds(10), std::move(handler), stat); }); std::mutex m; std::unique_lock l(m); @@ -398,7 +405,8 @@ class concurent_check : public check { const engine_to_agent_request_ptr& cnf, int command_exit_status, duration completion_delay, - check::completion_handler&& handler) + check::completion_handler&& handler, + const checks_statistics::pointer& stat) : check(io_context, logger, exp, @@ -407,7 +415,8 @@ class concurent_check : public check { cmd_name, cmd_line, cnf, - std::move(handler)), + std::move(handler), + stat), _completion_timer(*io_context), _command_exit_status(command_exit_status), _completion_delay(completion_delay) {} @@ -424,7 +433,8 @@ class concurent_check : public check { _completion_timer.async_wait([me = shared_from_this(), this, check_running_index = _get_running_check_index()]( - const boost::system::error_code& err) { + [[maybe_unused]] const boost::system:: + error_code& err) { active_checks.erase(this); checked.insert(this); SPDLOG_TRACE("end of completion timer for serv {}", get_service()); @@ -448,14 +458,15 @@ TEST_F(scheduler_test, max_concurent) { std::shared_ptr sched = scheduler::load( g_io_context, spdlog::default_logger(), "my_host", create_conf(200, 10, 1, 10, 1), - [&](const std::shared_ptr& req) {}, + [&]([[maybe_unused]] const std::shared_ptr& req) {}, [](const std::shared_ptr& io_context, const std::shared_ptr& logger, time_point start_expected, duration check_interval, const std::string& service, const std::string& cmd_name, const std::string& cmd_line, const engine_to_agent_request_ptr& engine_to_agent_request, - check::completion_handler&& handler) { + check::completion_handler&& handler, + const checks_statistics::pointer& stat) { return std::make_shared( io_context, logger, start_expected, check_interval, service, cmd_name, cmd_line, engine_to_agent_request, 0, @@ -463,7 +474,7 @@ TEST_F(scheduler_test, max_concurent) { 10) /*the - 10 is for some delay in test execution from start expected*/ , - std::move(handler)); + std::move(handler), stat); }); // to many tests to be completed in eleven second diff --git a/tests/broker-engine/cma.robot b/tests/broker-engine/cma.robot index a7209a9852..9678c3538d 100644 --- a/tests/broker-engine/cma.robot +++ b/tests/broker-engine/cma.robot @@ -675,6 +675,74 @@ BEOTEL_CENTREON_AGENT_CHECK_NATIVE_SERVICE Should Be True ${result} resources table not updated +BEOTEL_CENTREON_AGENT_CHECK_HEALTH + [Documentation] agent check health and we expect to get it in check result + [Tags] broker engine opentelemetry MON-147934 + Ctn Config Engine ${1} ${2} ${2} + Ctn Add Otl ServerModule + ... 0 + ... {"otel_server":{"host": "0.0.0.0","port": 4317},"max_length_grpc_log":0,"centreon_agent":{"check_interval":10, "export_period":15}} + Ctn Config Add Otl Connector + ... 0 + ... OTEL connector + ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name + Ctn Engine Config Replace Value In Services ${0} service_1 check_command cpu_check + Ctn Engine Config Replace Value In Services ${0} service_2 check_command health_check + Ctn Set Services Passive 0 service_[1-2] + + + Ctn Engine Config Add Command ${0} cpu_check {"check": "cpu_percentage"} OTEL connector + Ctn Engine Config Add Command ${0} health_check {"check": "health"} OTEL connector + Ctn Engine Config Add Command ${0} health_check_warning {"check": "health", "args":{"warning-interval": "5"} } OTEL connector + Ctn Engine Config Add Command ${0} health_check_critical {"check": "health", "args":{"warning-interval": "5", "critical-interval": "6"} } OTEL connector + + Ctn Engine Config Set Value 0 log_level_checks trace + + Ctn Clear Db metrics + + Ctn Config Broker central + Ctn Config Broker module + Ctn Config Broker rrd + Ctn Config Centreon Agent + Ctn Broker Config Log central sql trace + + Ctn Config BBDO3 1 + Ctn Clear Retention + + ${start} Ctn Get Round Current Date + Ctn Start Broker + Ctn Start Engine + Ctn Start Agent + + # Let's wait for the otel server start + Ctn Wait For Otel Server To Be Ready ${start} + + Log To Console service_1 and service_2 must be ok + ${result} Ctn Check Service Resource Status With Timeout host_1 service_1 0 120 HARD + Should Be True ${result} resources table not updated for service_1 + + ${result} Ctn Check Service Resource Status With Timeout host_1 service_2 0 60 HARD + Should Be True ${result} resources table not updated for service_2 + + ${metrics_list} Create List cpu.utilization.percentage 0#core.cpu.utilization.percentage + ${result} Ctn Compare Metrics Of Service 1 ${metrics_list} 30 + Should Be True ${result} cpu metrics not updated + + ${metrics_list} Create List runtime interval + ${result} Ctn Compare Metrics Of Service 2 ${metrics_list} 30 + Should Be True ${result} health metrics not updated + + Log To Console service_2 must be warning + Ctn Engine Config Replace Value In Services ${0} service_2 check_command health_check_warning + Ctn Reload Engine + ${result} Ctn Check Service Resource Status With Timeout host_1 service_2 1 60 ANY + Should Be True ${result} resources table not updated for service_2 + + Log To Console service_2 must be critical + Ctn Engine Config Replace Value In Services ${0} service_2 check_command health_check_critical + Ctn Reload Engine + ${result} Ctn Check Service Resource Status With Timeout host_1 service_2 2 60 ANY + Should Be True ${result} resources table not updated for service_2 *** Keywords ***