diff --git a/CMakeListsWindows.txt b/CMakeListsWindows.txt index a2bd9c12d3e..f24aa1dd494 100644 --- a/CMakeListsWindows.txt +++ b/CMakeListsWindows.txt @@ -16,6 +16,9 @@ # For more information : contact@centreon.com # +#in order to make fmt compile +add_definitions("/utf-8") + # When we build from cache (CI), we don't use vcpkg cmaketool, so we tell to cmake where to find packages info if (BUILD_FROM_CACHE) LIST(APPEND CMAKE_PREFIX_PATH "build_windows/vcpkg_installed/x64-windows") diff --git a/common/src/rapidjson_helper.cc b/common/src/rapidjson_helper.cc index 5663af59b64..2252cdc262f 100644 --- a/common/src/rapidjson_helper.cc +++ b/common/src/rapidjson_helper.cc @@ -345,7 +345,7 @@ const rapidjson::Value& rapidjson_helper::get_member( */ rapidjson::Document rapidjson_helper::read_from_file( const std::string_view& path) { - FILE* to_close = fopen(path.data(), "r+b"); + FILE* to_close = fopen(path.data(), "r"); if (!to_close) { throw exceptions::msg_fmt("Fail to read file '{}' : {}", path, strerror(errno)); diff --git a/common/tests/process_test.cc b/common/tests/process_test.cc index 660c35e0d48..81186e0318b 100644 --- a/common/tests/process_test.cc +++ b/common/tests/process_test.cc @@ -45,6 +45,7 @@ class process_test : public ::testing::Test { }; class process_wait : public process { + std::mutex _cond_m; std::condition_variable _cond; std::string _stdout; std::string _stderr; @@ -52,13 +53,14 @@ class process_wait : public process { bool _stderr_eof = false; bool _process_ended = false; - void _notify() { - if (_stdout_eof && _stderr_eof && _process_ended) { - _cond.notify_one(); - } + public: + void reset_end() { + std::lock_guard l(_cond_m); + _stdout_eof = false; + _stderr_eof = false; + _process_ended = false; } - public: void on_stdout_read(const boost::system::error_code& err, size_t nb_read) override { if (!err) { @@ -66,8 +68,10 @@ class process_wait : public process { _stdout += line; SPDLOG_LOGGER_DEBUG(_logger, "read from stdout: {}", line); } else if (err == asio::error::eof || err == asio::error::broken_pipe) { + std::unique_lock l(_cond_m); _stdout_eof = true; - _notify(); + l.unlock(); + _cond.notify_one(); } process::on_stdout_read(err, nb_read); } @@ -79,8 +83,10 @@ class process_wait : public process { _stderr += line; SPDLOG_LOGGER_DEBUG(_logger, "read from stderr: {}", line); } else if (err == asio::error::eof || err == asio::error::broken_pipe) { + std::unique_lock l(_cond_m); _stderr_eof = true; - _notify(); + l.unlock(); + _cond.notify_one(); } process::on_stderr_read(err, nb_read); } @@ -89,8 +95,10 @@ class process_wait : public process { int raw_exit_status) override { process::on_process_end(err, raw_exit_status); SPDLOG_LOGGER_DEBUG(_logger, "process end"); + std::unique_lock l(_cond_m); _process_ended = true; - _notify(); + l.unlock(); + _cond.notify_one(); } template @@ -109,9 +117,9 @@ class process_wait : public process { const std::string& get_stderr() const { return _stderr; } void wait() { - std::mutex dummy; - std::unique_lock l(dummy); - _cond.wait(l); + std::unique_lock l(_cond_m); + _cond.wait(l, + [this] { return _process_ended && _stderr_eof && _stdout_eof; }); } }; @@ -154,11 +162,11 @@ TEST_F(process_test, call_start_several_time) { new process_wait(g_io_context, _logger, ECHO_PATH, {"hello"})); std::string expected; for (int ii = 0; ii < 10; ++ii) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + to_wait->reset_end(); to_wait->start_process(true); + to_wait->wait(); expected += "hello" END_OF_LINE; } - to_wait->wait(); ASSERT_EQ(to_wait->get_exit_status(), 0); ASSERT_EQ(to_wait->get_stdout(), expected); ASSERT_EQ(to_wait->get_stderr(), ""); @@ -169,11 +177,11 @@ TEST_F(process_test, call_start_several_time_no_args) { new process_wait(g_io_context, _logger, ECHO_PATH " hello")); std::string expected; for (int ii = 0; ii < 10; ++ii) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + to_wait->reset_end(); to_wait->start_process(true); + to_wait->wait(); expected += "hello" END_OF_LINE; } - to_wait->wait(); std::this_thread::sleep_for(std::chrono::milliseconds(10)); ASSERT_EQ(to_wait->get_exit_status(), 0); ASSERT_EQ(to_wait->get_stdout(), expected); diff --git a/engine/inc/com/centreon/engine/check_result.hh b/engine/inc/com/centreon/engine/check_result.hh index 9d02da8ef3b..214fd08a82c 100644 --- a/engine/inc/com/centreon/engine/check_result.hh +++ b/engine/inc/com/centreon/engine/check_result.hh @@ -51,8 +51,6 @@ class check_result { return _object_check_type; } void set_object_check_type(enum check_source object_check_type); - uint64_t get_command_id() const { return _command_id; } - void set_command_id(uint64_t command_id) { _command_id = command_id; } inline notifier* get_notifier() { return _notifier; } void set_notifier(notifier* notifier); @@ -81,7 +79,6 @@ class check_result { private: enum check_source _object_check_type; // is this a service or a host check? - uint64_t _command_id; notifier* _notifier; // was this an active or passive service check? enum checkable::check_type _check_type; diff --git a/engine/inc/com/centreon/engine/checks/checker.hh b/engine/inc/com/centreon/engine/checks/checker.hh index 964a272f9cd..5419a86a248 100644 --- a/engine/inc/com/centreon/engine/checks/checker.hh +++ b/engine/inc/com/centreon/engine/checks/checker.hh @@ -23,9 +23,8 @@ #include "com/centreon/engine/anomalydetection.hh" #include "com/centreon/engine/commands/command.hh" -namespace com::centreon::engine { +namespace com::centreon::engine::checks { -namespace checks { /** * @class checks check_result.hh * @brief Run object and reap the result. @@ -57,6 +56,9 @@ class checker : public commands::command_listener { void wait_completion(e_completion_filter filter = e_completion_filter::all); + template + void inspect_reap_partial(queue_handler&& handler) const; + private: checker(bool used_by_test); checker(checker const& right); @@ -66,7 +68,7 @@ class checker : public commands::command_listener { host::host_state _execute_sync(host* hst); /* A mutex to protect access on _waiting_check_result and _to_reap_partial */ - std::mutex _mut_reap; + mutable std::mutex _mut_reap; /* * Here is the list of prepared check results but with a command being * running. When the command will be finished, each check result is get back @@ -92,8 +94,19 @@ class checker : public commands::command_listener { std::condition_variable _finish_cond; bool _finished; }; -} // namespace checks +/** + * @brief allow to inspect _to_reap_partial + * + * @tparam queue_handler + * @param handler must have () (const std::deque &) + */ +template +void checker::inspect_reap_partial(queue_handler&& handler) const { + std::lock_guard lock(_mut_reap); + handler(_to_reap_partial); } +} // namespace com::centreon::engine::checks + #endif // !CCE_CHECKS_CHECKER_HH diff --git a/engine/inc/com/centreon/engine/commands/otel_connector.hh b/engine/inc/com/centreon/engine/commands/otel_connector.hh index 7a21e4c4589..bb0925efe7d 100644 --- a/engine/inc/com/centreon/engine/commands/otel_connector.hh +++ b/engine/inc/com/centreon/engine/commands/otel_connector.hh @@ -31,8 +31,7 @@ namespace com::centreon::engine::commands { * open telemetry request run command line configure converter who converts * data_points to result */ -class otel_connector : public command, - public std::enable_shared_from_this { +class otel_connector : public command { otel::host_serv_list::pointer _host_serv_list; public: @@ -43,16 +42,17 @@ class otel_connector : public command, static otel_connector_container _commands; std::shared_ptr _extractor; - std::shared_ptr _conv_conf; + std::shared_ptr _check_result_builder; std::shared_ptr _logger; void init(); public: - static void create(const std::string& connector_name, - const std::string& cmd_line, - commands::command_listener* listener); + static std::shared_ptr create( + const std::string& connector_name, + const std::string& cmd_line, + commands::command_listener* listener); static bool remove(const std::string& connector_name); @@ -62,6 +62,10 @@ class otel_connector : public command, static std::shared_ptr get_otel_connector( const std::string& connector_name); + static std::shared_ptr get_otel_connector_from_host_serv( + const std::string_view& host, + const std::string_view& serv); + static void clear(); static void init_all(); @@ -76,6 +80,11 @@ class otel_connector : public command, void update(const std::string& cmd_line); + void process_data_pts( + const std::string_view& host, + const std::string_view& serv, + const modules::opentelemetry::metric_to_datapoints& data_pts); + virtual uint64_t run(const std::string& processed_cmd, nagios_macros& macros, uint32_t timeout, diff --git a/engine/inc/com/centreon/engine/commands/otel_interface.hh b/engine/inc/com/centreon/engine/commands/otel_interface.hh index 7c26706c86e..0e6a6b18704 100644 --- a/engine/inc/com/centreon/engine/commands/otel_interface.hh +++ b/engine/inc/com/centreon/engine/commands/otel_interface.hh @@ -22,6 +22,10 @@ #include "com/centreon/engine/commands/result.hh" #include "com/centreon/engine/macros/defines.hh" +namespace com::centreon::engine::modules::opentelemetry { +class metric_to_datapoints; +} + namespace com::centreon::engine::commands::otel { /** @@ -66,14 +70,34 @@ class host_serv_list { const std::string& service_description); void remove(const std::string& host, const std::string& service_description); - bool contains(const std::string& host, - const std::string& service_description) const; + template + bool contains(const string_type& host, + const string_type& service_description) const; template host_serv_metric match(const host_set& hosts, const service_set& services) const; }; +/** + * @brief test if a host serv pair is contained in list + * + * @param host + * @param service_description + * @return true found + * @return false not found + */ +template +bool host_serv_list::contains(const string_type& host, + const string_type& service_description) const { + absl::ReaderMutexLock l(&_data_m); + auto host_search = _data.find(host); + if (host_search != _data.end()) { + return host_search->second.contains(service_description); + } + return false; +} + template host_serv_metric host_serv_list::match(const host_set& hosts, const service_set& services) const { @@ -111,13 +135,15 @@ class host_serv_extractor { virtual ~host_serv_extractor() = default; }; -class check_result_builder_config { +class otl_check_result_builder_base { public: - virtual ~check_result_builder_config() = default; + virtual ~otl_check_result_builder_base() = default; + virtual void process_data_pts( + const std::string_view& host, + const std::string_view& serv, + const modules::opentelemetry::metric_to_datapoints& data_pts) = 0; }; -using result_callback = std::function; - class open_telemetry_base; /** @@ -139,17 +165,8 @@ class open_telemetry_base const std::string& cmdline, const host_serv_list::pointer& host_serv_list) = 0; - virtual std::shared_ptr - create_check_result_builder_config(const std::string& cmd_line) = 0; - - virtual bool check( - const std::string& processed_cmd, - const std::shared_ptr& conv_conf, - uint64_t command_id, - nagios_macros& macros, - uint32_t timeout, - commands::result& res, - result_callback&& handler) = 0; + virtual std::shared_ptr + create_check_result_builder(const std::string& cmdline) = 0; }; }; // namespace com::centreon::engine::commands::otel diff --git a/engine/inc/com/centreon/engine/service.hh b/engine/inc/com/centreon/engine/service.hh index c2c1e853c7f..a6b4d84ee44 100644 --- a/engine/inc/com/centreon/engine/service.hh +++ b/engine/inc/com/centreon/engine/service.hh @@ -38,12 +38,55 @@ class servicegroup; class serviceescalation; } // namespace com::centreon::engine +/** + * @brief pair with host_name in first and serv in second + * + */ +using host_serv_pair = std::pair; + +/** + * @brief This struct is used to lookup in a host_serv_pair indexed container + * with a std::pair + * + */ +struct host_serv_hash_eq { + using is_transparent = void; + using host_serv_string_view = std::pair; + + size_t operator()(const host_serv_pair& to_hash) const { + return absl::Hash()(to_hash); + } + size_t operator()(const host_serv_string_view& to_hash) const { + return absl::Hash()(to_hash); + } + + bool operator()(const host_serv_pair& left, + const host_serv_pair& right) const { + return left == right; + } + bool operator()(const host_serv_pair& left, + const host_serv_string_view& right) const { + return left.first == right.first && left.second == right.second; + } + bool operator()(const host_serv_string_view& left, + const host_serv_pair& right) const { + return left.first == right.first && left.second == right.second; + } + bool operator()(const host_serv_string_view& left, + const host_serv_string_view& right) const { + return left == right; + } +}; + using service_map = - absl::flat_hash_map, - std::shared_ptr>; -using service_map_unsafe = - absl::flat_hash_map, - com::centreon::engine::service*>; + absl::flat_hash_map, + host_serv_hash_eq, + host_serv_hash_eq>; +using service_map_unsafe = absl::flat_hash_map; using service_id_map = absl::btree_map, std::shared_ptr>; diff --git a/engine/modules/opentelemetry/CMakeLists.txt b/engine/modules/opentelemetry/CMakeLists.txt index acb843bb80b..5feec71f6f5 100644 --- a/engine/modules/opentelemetry/CMakeLists.txt +++ b/engine/modules/opentelemetry/CMakeLists.txt @@ -70,8 +70,6 @@ ${SRC_DIR}/centreon_agent/agent_impl.cc ${SRC_DIR}/centreon_agent/agent_reverse_client.cc ${SRC_DIR}/centreon_agent/agent_service.cc ${SRC_DIR}/centreon_agent/to_agent_connector.cc -${SRC_DIR}/data_point_fifo.cc -${SRC_DIR}/data_point_fifo_container.cc ${SRC_DIR}/grpc_config.cc ${SRC_DIR}/host_serv_extractor.cc ${SRC_DIR}/open_telemetry.cc diff --git a/engine/modules/opentelemetry/doc/opentelemetry.md b/engine/modules/opentelemetry/doc/opentelemetry.md index 379643f2889..73568749bb3 100644 --- a/engine/modules/opentelemetry/doc/opentelemetry.md +++ b/engine/modules/opentelemetry/doc/opentelemetry.md @@ -4,8 +4,8 @@ Engine can receive open telemetry data on a grpc server A new module is added opentelemetry It works like that: * metrics are received -* extractors tries to extract host name and service description for each otl_data_point. On success, otl_data_point are pushed on fifos indexed by host, service -* a service that used these datas wants to do a check. The cmd line identifies the otl_check_result_builder that will construct check result from host service otl_data_point fifos. If converter achieves to build a result from metrics, it returns right now, if it doesn't, a handler will be called as soon as needed metrics will be available or timeout expires. +* extractors tries to extract host name and service description for each otl_data_point. +* On success, it searches a check_result_builder used by the passive otel service. Then the check_result_builder converts otl_data_point in check_result and update passive service. ### open telemetry request The proto is organized like that @@ -115,11 +115,9 @@ The proto is organized like that ### Concepts and classes * otl_data_point: otl_data_point is the smallest unit of received request, otl_data_point class contains otl_data_point protobuf object and all his parents (resource, scope, metric) * host serv extractors: When we receive otel metrics, we must extract host and service, this is his job. It can be configurable in order for example to search host name in otl_data_point attribute or in scope. host serv extractors also contains host serv allowed. This list is updated by register_host_serv command method -* otl_data_point fifo: a container that contains data points indexed by timestamp -* otl_data_point fifo container: fifos indexed by host service * otel_connector: a fake connector that is used to make the link between engine and otel module * otl_server: a grpc server that accept otel collector incoming connections -* otl_check_result_builder: This short lived object is created each time engine wants to do a check. His final class as his configuration is done from the command line of the check. His job is to create a check result from otl_data_point fifo container datas. It's destroyed when he achieved to create a check result or when timeout expires. +* otl_check_result_builder: His final class as his configuration is done from the command line of the check. His job is to create a check result from otl_data_point. * host_serv_list: in order to extract host and service, an host_serv extractor must known allowed host service pairs. As otel_connector may be notified of host service using it by register_host_serv method while otel module is not yet loaded. This object shared between otel_connector and host_serv_extractor is actualized from otel_connector::register_host_serv. ### How engine access to otl object @@ -128,16 +126,9 @@ Object used by both otel module and engine are inherited from these interfaces. Engine only knows a singleton of the interface open_telemetry_base. This singleton is initialized at otl module loading. ### How to configure it -We use a fake connector. When configuration is loaded, if a connector command line begins with "open_telemetry", we create an otel_connector. Arguments following "open_telemetry" are used to create an host service extractor. If otel module is loaded, we create extractor, otherwise, the otel_connector initialization will be done at otel module loading. -So user has to build one connector by host serv extractor configuration. -Then commands can use these fake connectors (class otel_connector) to run checks. - -### How a service do a check -When otel_connector::run is called, it calls the check method of open_telemetry singleton. -The check method of open_telemetry object will use command line passed to run to create an otl_check_result_builder object that has to convert metrics to check result. -The open_telemetry call sync_build_result_from_metrics, if it can't achieve to build a result, otl_check_result_builder is stored in a container. -When a metric of a waiting service is received, async_build_result_from_metrics of otl_check_result_builder is called. -In open_telemetry object, a second timer is also used to call async_time_out of otl_check_result_builder on timeout expire. +We use a fake connector. When configuration is loaded, if a connector command line begins with "open_telemetry", we create an otel_connector. Arguments following "open_telemetry" are used to create an host service extractor and a check_result_builder. If otel module is loaded, we create extractor, otherwise, the otel_connector initialization will be done at otel module loading. +So user has to build one connector by host serv extractor, check_result_builder configuration. +Then received otel data_points will be converted in check_result. ### other configuration other configuration parameters are stored in a dedicated json file. The path of this file is passed as argument in centengine.cfg diff --git a/engine/modules/opentelemetry/doc/otel_configuration.odg b/engine/modules/opentelemetry/doc/otel_configuration.odg new file mode 100644 index 00000000000..c14e698328b Binary files /dev/null and b/engine/modules/opentelemetry/doc/otel_configuration.odg differ diff --git a/engine/modules/opentelemetry/doc/otel_configuration.pdf b/engine/modules/opentelemetry/doc/otel_configuration.pdf new file mode 100644 index 00000000000..3bff7928002 Binary files /dev/null and b/engine/modules/opentelemetry/doc/otel_configuration.pdf differ diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh index adcee312878..c2b403979a0 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh @@ -91,25 +91,13 @@ namespace com::centreon::engine::modules::opentelemetry::centreon_agent { * */ class agent_check_result_builder : public otl_check_result_builder { - protected: - bool _build_result_from_metrics(metric_name_to_fifo& fifos, - commands::result& res) override; - public: agent_check_result_builder(const std::string& cmd_line, - uint64_t command_id, - const host& host, - const service* service, - std::chrono::system_clock::time_point timeout, - commands::otel::result_callback&& handler, const std::shared_ptr& logger) - : otl_check_result_builder(cmd_line, - command_id, - host, - service, - timeout, - std::move(handler), - logger) {} + : otl_check_result_builder(cmd_line, logger) {} + + bool build_result_from_metrics(const metric_to_datapoints& data_pts, + check_result& res) override; }; } // namespace com::centreon::engine::modules::opentelemetry::centreon_agent diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/data_point_fifo.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/data_point_fifo.hh deleted file mode 100644 index bf78b223b7b..00000000000 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/data_point_fifo.hh +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Copyright 2024 Centreon - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For more information : contact@centreon.com - */ -#ifndef CCE_MOD_OTL_SERVER_DATA_POINT_FIFO_HH -#define CCE_MOD_OTL_SERVER_DATA_POINT_FIFO_HH - -#include "otl_data_point.hh" - -namespace com::centreon::engine::modules::opentelemetry { - -/** - * @brief This class is a multiset of opentelemetry otl_data_point ordered by - * nano_timestamp - * - */ -class data_point_fifo { - struct time_unix_nano_compare { - /** - * @brief mandatory for heterogenous search (abseil or standard associative - * (C++20)) - * https://en.cppreference.com/w/cpp/utility/functional - * - */ - using is_transparent = void; - bool operator()(const otl_data_point& left, - const otl_data_point& right) const { - return left.get_nano_timestamp() < right.get_nano_timestamp(); - } - bool operator()(const otl_data_point& left, - uint64_t nano_timestamp_right) const { - return left.get_nano_timestamp() < nano_timestamp_right; - } - bool operator()(uint64_t nano_timestamp_left, - const otl_data_point& right) const { - return nano_timestamp_left < right.get_nano_timestamp(); - } - }; - - public: - using container = - absl::btree_multiset; - - private: - static time_t _second_datapoint_expiry; - static size_t _max_size; - - container _fifo; - - public: - const container& get_fifo() const { return _fifo; } - - bool empty() const { return _fifo.empty(); } - - void clear() { _fifo.clear(); } - - size_t size() const { return _fifo.size(); } - - void add_data_point(const otl_data_point& data_pt); - - void clean(); - - void clean_oldest(uint64_t expiry); - - static void update_fifo_limit(time_t second_datapoint_expiry, - size_t max_size); -}; - -using metric_name_to_fifo = absl::flat_hash_map; - -} // namespace com::centreon::engine::modules::opentelemetry - -#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/data_point_fifo_container.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/data_point_fifo_container.hh deleted file mode 100644 index 7406ea65648..00000000000 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/data_point_fifo_container.hh +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Copyright 2024 Centreon - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 * You may obtain a copy of the - License at - - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For more information : contact@centreon.com - */ -#ifndef CCE_MOD_OTL_SERVER_DATA_POINT_FIFO_CONTAINER_HH -#define CCE_MOD_OTL_SERVER_DATA_POINT_FIFO_CONTAINER_HH - -#include "data_point_fifo.hh" - -namespace com::centreon::engine::modules::opentelemetry { - -/** - * @brief This class is a - * map host_serv -> map metric -> data_point_fifo (list of data_points) - * - */ -class data_point_fifo_container { - public: - private: - /** - * @brief - * metrics are ordered like this: - * => metric1 => data_points list - * => metric2 => data_points list - * - */ - using host_serv_to_metrics = absl::flat_hash_map; - - host_serv_to_metrics _data; - - static metric_name_to_fifo _empty; - - std::mutex _data_m; - - public: - void clean(); - - static void clean_empty_fifos(metric_name_to_fifo& to_clean); - - void add_data_point(const std::string_view& host, - const std::string_view& service, - const std::string_view& metric, - const otl_data_point& data_pt); - - const metric_name_to_fifo& get_fifos(const std::string& host, - const std::string& service) const; - - metric_name_to_fifo& get_fifos(const std::string& host, - const std::string& service); - - void lock() { _data_m.lock(); } - - void unlock() { _data_m.unlock(); } - - void dump(std::string& output) const; -}; - -} // namespace com::centreon::engine::modules::opentelemetry - -namespace fmt { -template <> -struct formatter< - com::centreon::engine::modules::opentelemetry::data_point_fifo_container> - : formatter { - template - auto format(const com::centreon::engine::modules::opentelemetry:: - data_point_fifo_container& cont, - FormatContext& ctx) const -> decltype(ctx.out()) { - std::string output; - cont.dump(output); - return formatter::format(output, ctx); - } -}; - -} // namespace fmt - -#endif diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh index aa601e0c951..b30ba4664b3 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/open_telemetry.hh @@ -23,7 +23,6 @@ #include "com/centreon/engine/commands/otel_interface.hh" #include "centreon_agent/agent_reverse_client.hh" -#include "data_point_fifo_container.hh" #include "host_serv_extractor.hh" #include "otl_check_result_builder.hh" #include "otl_config.hh" @@ -46,7 +45,6 @@ class otl_server; * */ class open_telemetry : public commands::otel::open_telemetry_base { - asio::system_timer _second_timer; std::shared_ptr _otl_server; std::shared_ptr _telegraf_conf_server; std::unique_ptr _agent_reverse_client; @@ -54,48 +52,15 @@ class open_telemetry : public commands::otel::open_telemetry_base { using cmd_line_to_extractor_map = absl::btree_map>; cmd_line_to_extractor_map _extractors; - data_point_fifo_container _fifo; std::string _config_file_path; std::unique_ptr _conf; std::shared_ptr _logger; - struct host_serv_getter { - using result_type = host_serv; - const result_type& operator()( - const std::shared_ptr& node) const { - return node->get_host_serv(); - } - }; - - struct time_out_getter { - using result_type = std::chrono::system_clock::time_point; - result_type operator()( - const std::shared_ptr& node) const { - return node->get_time_out(); - } - }; - - /** - * @brief when check can't return data right now, we have no metrics in fifo, - * converter is stored in this container. It's indexed by host,serv and by - * timeout - * - */ - using waiting_converter = boost::multi_index::multi_index_container< - std::shared_ptr, - boost::multi_index::indexed_by< - boost::multi_index::hashed_non_unique, - boost::multi_index::ordered_non_unique>>; - - waiting_converter _waiting; - std::shared_ptr _io_context; mutable std::mutex _protect; void _forward_to_broker(const std::vector& unknown); - void _second_timer_handler(); - void _create_telegraf_conf_server( const telegraf::conf_server_config::pointer& conf); @@ -103,9 +68,7 @@ class open_telemetry : public commands::otel::open_telemetry_base { virtual void _create_otl_server( const grpc_config::pointer& server_conf, const centreon_agent::agent_config::pointer& agent_conf); - void _on_metric(const metric_request_ptr& metric); void _reload(); - void _start_second_timer(); void _shutdown(); public: @@ -131,21 +94,14 @@ class open_telemetry : public commands::otel::open_telemetry_base { static void unload(const std::shared_ptr& logger); - bool check(const std::string& processed_cmd, - const std::shared_ptr& - conv_conf, - uint64_t command_id, - nagios_macros& macros, - uint32_t timeout, - commands::result& res, - commands::otel::result_callback&& handler) override; + void on_metric(const metric_request_ptr& metric); std::shared_ptr create_extractor( const std::string& cmdline, const commands::otel::host_serv_list::pointer& host_serv_list) override; - std::shared_ptr - create_check_result_builder_config(const std::string& cmd_line) override; + std::shared_ptr + create_check_result_builder(const std::string& cmdline) override; }; } // namespace com::centreon::engine::modules::opentelemetry diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh index 71b44670c3a..1f2f32a8b6e 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh @@ -19,35 +19,40 @@ #ifndef CCE_MOD_OTL_CHECK_RESULT_BUILDER_HH #define CCE_MOD_OTL_CHECK_RESULT_BUILDER_HH +#include "com/centreon/engine/check_result.hh" + #include "com/centreon/engine/commands/otel_interface.hh" -#include "data_point_fifo.hh" +#include "otl_data_point.hh" namespace com::centreon::engine::modules::opentelemetry { -class data_point_fifo_container; - /** - * @brief converter are asynchronous object created on each check - * In order to not parse command line on each check, we parse it once and then - * create a converter config that will be used to create converter + * @brief compare data_points with nano_timestamp * */ -class check_result_builder_config - : public commands::otel::check_result_builder_config { - public: - enum class converter_type { - nagios_check_result_builder, - centreon_agent_check_result_builder - }; +struct otl_data_point_pointer_compare { + using is_transparent = void; - private: - const converter_type _type; + bool operator()(const otl_data_point& left, + const otl_data_point& right) const { + return left.get_nano_timestamp() < right.get_nano_timestamp(); + } - public: - check_result_builder_config(converter_type conv_type) : _type(conv_type) {} - converter_type get_type() const { return _type; } + bool operator()(const otl_data_point& left, uint64_t right) const { + return left.get_nano_timestamp() < right; + } + + bool operator()(uint64_t left, const otl_data_point& right) const { + return left < right.get_nano_timestamp(); + } }; +class metric_to_datapoints + : public absl::flat_hash_map< + std::string_view, + absl::btree_multiset> {}; + /** * @brief The goal of this converter is to convert otel metrics in result * This object is synchronous and asynchronous @@ -57,67 +62,32 @@ class check_result_builder_config * */ class otl_check_result_builder - : public std::enable_shared_from_this { + : public commands::otel::otl_check_result_builder_base { const std::string _cmd_line; - const uint64_t _command_id; - const std::pair _host_serv; - const std::chrono::system_clock::time_point _timeout; - const commands::otel::result_callback _callback; protected: std::shared_ptr _logger; - virtual bool _build_result_from_metrics(metric_name_to_fifo&, - commands::result& res) = 0; - public: otl_check_result_builder(const std::string& cmd_line, - uint64_t command_id, - const host& host, - const service* service, - std::chrono::system_clock::time_point timeout, - commands::otel::result_callback&& handler, const std::shared_ptr& logger); virtual ~otl_check_result_builder() = default; const std::string& get_cmd_line() const { return _cmd_line; } - uint64_t get_command_id() const { return _command_id; } - - const std::string& get_host_name() const { return _host_serv.first; } - const std::string& get_service_description() const { - return _host_serv.second; - } - - const std::pair& get_host_serv() const { - return _host_serv; - } - - std::chrono::system_clock::time_point get_time_out() const { - return _timeout; - } - - bool sync_build_result_from_metrics(data_point_fifo_container& data_pts, - commands::result& res); - - bool async_build_result_from_metrics(data_point_fifo_container& data_pts); - void async_time_out(); - virtual void dump(std::string& output) const; + void process_data_pts(const std::string_view& host, + const std::string_view& serv, + const metric_to_datapoints& data_pts) override; + static std::shared_ptr create( const std::string& cmd_line, - const std::shared_ptr& conf, - uint64_t command_id, - const host& host, - const service* service, - std::chrono::system_clock::time_point timeout, - commands::otel::result_callback&& handler, const std::shared_ptr& logger); - static std::shared_ptr - create_check_result_builder_config(const std::string& cmd_line); + virtual bool build_result_from_metrics(const metric_to_datapoints& data_pts, + check_result& res) = 0; }; } // namespace com::centreon::engine::modules::opentelemetry diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh index 5b87b0db2fb..6b124c4276c 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_config.hh @@ -34,12 +34,6 @@ class otl_config { bool _json_grpc_log = false; // if true, otel object are logged in json // format instead of protobuf debug format - // this two attributes are limits used by otel otl_data_point fifos - // if fifo size exceed _max_fifo_size, oldest data_points are removed - // Also, data_points older than _second_fifo_expiry are removed from fifos - unsigned _second_fifo_expiry; - size_t _max_fifo_size; - public: otl_config(const std::string_view& file_path, asio::io_context& io_context); @@ -56,9 +50,6 @@ class otl_config { int get_max_length_grpc_log() const { return _max_length_grpc_log; } bool get_json_grpc_log() const { return _json_grpc_log; } - unsigned get_second_fifo_expiry() const { return _second_fifo_expiry; } - size_t get_max_fifo_size() const { return _max_fifo_size; } - bool operator==(const otl_config& right) const; inline bool operator!=(const otl_config& right) const { diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh index bad1bc2236e..76c79038413 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/otl_data_point.hh @@ -45,45 +45,6 @@ struct initialized_data_class : public data_class { } }; -/** - * @brief pair with host_name in first and serv in second - * - */ -using host_serv = std::pair; - -/** - * @brief This struct is used to lookup in a host_serv indexed container with - * a std::pair - * - */ -struct host_serv_hash_eq { - using is_transparent = void; - using host_serv_string_view = std::pair; - - size_t operator()(const host_serv& to_hash) const { - return absl::Hash()(to_hash); - } - size_t operator()(const host_serv_string_view& to_hash) const { - return absl::Hash()(to_hash); - } - - bool operator()(const host_serv& left, const host_serv& right) const { - return left == right; - } - bool operator()(const host_serv& left, - const host_serv_string_view& right) const { - return left.first == right.first && left.second == right.second; - } - bool operator()(const host_serv_string_view& left, - const host_serv& right) const { - return left.first == right.first && left.second == right.second; - } - bool operator()(const host_serv_string_view& left, - const host_serv_string_view& right) const { - return left == right; - } -}; - using metric_request_ptr = std::shared_ptr<::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>; @@ -122,6 +83,7 @@ class otl_data_point { ::opentelemetry::proto::common::v1::KeyValue>& _data_point_attributes; const ::google::protobuf::RepeatedPtrField< ::opentelemetry::proto::metrics::v1::Exemplar>& _exemplars; + uint64_t _start_nano_timestamp; uint64_t _nano_timestamp; data_point_type _type; double _value; @@ -173,6 +135,7 @@ class otl_data_point { return _data_point; } + uint64_t get_start_nano_timestamp() const { return _start_nano_timestamp; } uint64_t get_nano_timestamp() const { return _nano_timestamp; } data_point_type get_type() { return _type; } diff --git a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh index 77bcd34b533..f07a618e6ba 100644 --- a/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh +++ b/engine/modules/opentelemetry/inc/com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh @@ -92,25 +92,13 @@ namespace com::centreon::engine::modules::opentelemetry::telegraf { * */ class nagios_check_result_builder : public otl_check_result_builder { - protected: - bool _build_result_from_metrics(metric_name_to_fifo& fifos, - commands::result& res) override; - public: nagios_check_result_builder(const std::string& cmd_line, - uint64_t command_id, - const host& host, - const service* service, - std::chrono::system_clock::time_point timeout, - commands::otel::result_callback&& handler, const std::shared_ptr& logger) - : otl_check_result_builder(cmd_line, - command_id, - host, - service, - timeout, - std::move(handler), - logger) {} + : otl_check_result_builder(cmd_line, logger) {} + + bool build_result_from_metrics(const metric_to_datapoints& data_pts, + check_result& res) override; }; } // namespace com::centreon::engine::modules::opentelemetry::telegraf diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc index 769869ea12e..4eb528115f6 100644 --- a/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_check_result_builder.cc @@ -16,8 +16,6 @@ * For more information : contact@centreon.com */ -#include "data_point_fifo_container.hh" - #include "otl_check_result_builder.hh" #include "centreon_agent/agent_check_result_builder.hh" @@ -127,59 +125,60 @@ void perf_data::append_to_string(std::string* to_append) { * @return true * @return false */ -bool agent_check_result_builder::_build_result_from_metrics( - metric_name_to_fifo& fifos, - commands::result& res) { +bool agent_check_result_builder::build_result_from_metrics( + const metric_to_datapoints& data_pts, + check_result& res) { // first we search last state timestamp from status uint64_t last_time = 0; - for (auto& metric_to_fifo : fifos) { - if (metric_to_fifo.first == "status") { - auto& fifo = metric_to_fifo.second.get_fifo(); - if (!fifo.empty()) { - const auto& last_sample = *fifo.rbegin(); - last_time = last_sample.get_nano_timestamp(); - res.exit_code = last_sample.get_value(); - // output of plugins is stored in description metric field - res.output = last_sample.get_metric().description(); - metric_to_fifo.second.clean_oldest(last_time); - } - break; - } - } - if (!last_time) { + auto status_metric = data_pts.find("status"); + if (status_metric == data_pts.end()) { return false; } - res.command_id = get_command_id(); - res.exit_status = process::normal; - res.end_time = res.start_time = - timestamp(last_time / 1000000000, (last_time / 1000) % 1000000); + const auto& last_sample = status_metric->second.rbegin(); + last_time = last_sample->get_nano_timestamp(); + res.set_return_code(last_sample->get_value()); + + // output of plugins is stored in description metric field + std::string output = last_sample->get_metric().description(); + + res.set_finish_time( + {.tv_sec = static_cast(last_time / 1000000000), + .tv_usec = static_cast((last_time / 1000) % 1000000)}); + + if (last_sample->get_start_nano_timestamp() > 0) { + res.set_start_time( + {.tv_sec = static_cast(last_sample->get_start_nano_timestamp() / + 1000000000), + .tv_usec = static_cast( + (last_sample->get_start_nano_timestamp() / 1000) % 1000000)}); + } else { + res.set_start_time(res.get_finish_time()); + } - res.output.push_back('|'); + output.push_back('|'); - for (auto& metric_to_fifo : fifos) { - if (metric_to_fifo.first == "status") + for (const auto& metric_to_data_pt : data_pts) { + if (metric_to_data_pt.first == "status") continue; - auto& fifo = metric_to_fifo.second.get_fifo(); - auto data_pt_search = fifo.find(last_time); - if (data_pt_search != fifo.end()) { - res.output.push_back(' '); + auto data_pt_search = metric_to_data_pt.second.find(last_time); + if (data_pt_search != metric_to_data_pt.second.end()) { + output.push_back(' '); const otl_data_point& data_pt = *data_pt_search; - absl::StrAppend(&res.output, metric_to_fifo.first, "=", + absl::StrAppend(&output, metric_to_data_pt.first, "=", data_pt.get_value(), data_pt.get_metric().unit(), ";"); - // all other metric value (warning_lt, critical_gt, min... are stored in - // exemplars) + // all other metric value (warning_lt, critical_gt, min... are stored + // in exemplars) detail::perf_data to_append; for (const auto& exemplar : data_pt.get_exemplars()) { to_append.apply_exemplar(exemplar); } - to_append.append_to_string(&res.output); + to_append.append_to_string(&output); } - metric_to_fifo.second.clean_oldest(last_time); } - data_point_fifo_container::clean_empty_fifos(fifos); + res.set_output(output); return true; } diff --git a/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc b/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc index d5cbce16780..0d49927f5c7 100644 --- a/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc +++ b/engine/modules/opentelemetry/src/centreon_agent/agent_config.cc @@ -78,7 +78,7 @@ agent_config::agent_config(const rapidjson::Value& json_config_v) { _max_concurrent_checks = file_content.get_unsigned("max_concurrent_checks", 100); _export_period = file_content.get_unsigned("export_period", 60); - _check_timeout = file_content.get_unsigned("_check_timeout", 30); + _check_timeout = file_content.get_unsigned("check_timeout", 30); if (file_content.has_member("reverse_connections")) { const auto& reverse_array = file_content.get_member("reverse_connections"); diff --git a/engine/modules/opentelemetry/src/data_point_fifo.cc b/engine/modules/opentelemetry/src/data_point_fifo.cc deleted file mode 100644 index 00e4bec9d58..00000000000 --- a/engine/modules/opentelemetry/src/data_point_fifo.cc +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Copyright 2024 Centreon - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For more information : contact@centreon.com - */ - -#include "data_point_fifo.hh" - -using namespace com::centreon::engine::modules::opentelemetry; - -time_t data_point_fifo::_second_datapoint_expiry = 600; -size_t data_point_fifo::_max_size = 2; - -/** - * @brief opentelemetry fifo limits share a same value - * The goal of this isto fix these limits - * - * @param second_datapoint_expiry - * @param max_size - */ -void data_point_fifo::update_fifo_limit(time_t second_datapoint_expiry, - size_t max_size) { - _second_datapoint_expiry = second_datapoint_expiry; - _max_size = max_size; -} - -/** - * @brief add one data point to fifo - * - * @param data_pt - */ -void data_point_fifo::add_data_point(const otl_data_point& data_pt) { - clean(); - _fifo.insert(data_pt); -} - -/** - * @brief erase to older data points - * - */ -void data_point_fifo::clean() { - if (!_fifo.empty()) { - auto first = _fifo.begin(); - time_t expiry = time(nullptr) - _second_datapoint_expiry; - if (expiry < 0) { - expiry = 0; - } - - while (!_fifo.empty() && - first->get_nano_timestamp() / 1000000000 < expiry) { - first = _fifo.erase(first); - } - - if (_fifo.size() >= _max_size) { - _fifo.erase(first); - } - } -} - -/** - * @brief erase oldest element - * - * @param expiry data points older than this nano timestamp are erased - */ -void data_point_fifo::clean_oldest(uint64_t expiry) { - while (!_fifo.empty() && _fifo.begin()->get_nano_timestamp() < expiry) { - _fifo.erase(_fifo.begin()); - } -} diff --git a/engine/modules/opentelemetry/src/data_point_fifo_container.cc b/engine/modules/opentelemetry/src/data_point_fifo_container.cc deleted file mode 100644 index 112ffb271d5..00000000000 --- a/engine/modules/opentelemetry/src/data_point_fifo_container.cc +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Copyright 2024 Centreon - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For more information : contact@centreon.com - */ - -#include "data_point_fifo_container.hh" - -using namespace com::centreon::engine::modules::opentelemetry; - -metric_name_to_fifo data_point_fifo_container::_empty; - -/** - * @brief clean olds data_points - * no need to lock mutex - */ -void data_point_fifo_container::clean() { - std::lock_guard l(_data_m); - for (auto serv_to_fifo_iter = _data.begin(); - !_data.empty() && serv_to_fifo_iter != _data.end();) { - for (auto& fifo : serv_to_fifo_iter->second) { - fifo.second.clean(); - } - if (serv_to_fifo_iter->second.empty()) { - auto to_erase = serv_to_fifo_iter++; - _data.erase(to_erase); - } else { - ++serv_to_fifo_iter; - } - } -} - -/** - * @brief erase empty fifos - * mutex of the owner of to_clean must be locked before call - * - * @param to_clean map metric_name -> fifos - */ -void data_point_fifo_container::clean_empty_fifos( - metric_name_to_fifo& to_clean) { - for (auto to_clean_iter = to_clean.begin(); - !to_clean.empty() && to_clean_iter != to_clean.end();) { - if (to_clean_iter->second.empty()) { - auto to_erase = to_clean_iter++; - to_clean.erase(to_erase); - } else { - ++to_clean_iter; - } - } -} - -/** - * @brief add a data point in the corresponding fifo - * mutex must be locked during returned data use - * - * @param data_pt otl_data_point to add - */ -void data_point_fifo_container::add_data_point(const std::string_view& host, - const std::string_view& service, - const std::string_view& metric, - const otl_data_point& data_pt) { - metric_name_to_fifo& fifos = _data[std::make_pair(host, service)]; - auto exist = fifos.find(metric); - if (exist == fifos.end()) { - exist = fifos.emplace(metric, data_point_fifo()).first; - } - exist->second.add_data_point(data_pt); -} - -/** - * @brief get all fifos of a service - * mutex must be locked during returned data use - * - * @param host - * @param service - * @return const metric_name_to_fifo& - */ -const metric_name_to_fifo& data_point_fifo_container::get_fifos( - const std::string& host, - const std::string& service) const { - auto exist = _data.find({host, service}); - return exist == _data.end() ? _empty : exist->second; -} - -/** - * @brief get all fifos of a service - * mutex must be locked during returned data use - * - * @param host - * @param service - * @return metric_name_to_fifo& - */ -metric_name_to_fifo& data_point_fifo_container::get_fifos( - const std::string& host, - const std::string& service) { - auto exist = _data.find({host, service}); - return exist == _data.end() ? _empty : exist->second; -} - -/** - * @brief debug output - * - * @param output string to log - */ -void data_point_fifo_container::dump(std::string& output) const { - output.push_back('{'); - for (const auto& host_serv : _data) { - output.push_back('"'); - output.append(host_serv.first.first); - output.push_back(','); - output.append(host_serv.first.second); - output.append("\":{"); - for (const auto& metric_to_fifo : host_serv.second) { - output.push_back('"'); - output.append(metric_to_fifo.first); - output.append("\":"); - absl::StrAppend(&output, metric_to_fifo.second.size()); - output.push_back(','); - } - output.append("},"); - } - output.push_back('}'); -} \ No newline at end of file diff --git a/engine/modules/opentelemetry/src/open_telemetry.cc b/engine/modules/opentelemetry/src/open_telemetry.cc index 98707492915..776cecfba11 100644 --- a/engine/modules/opentelemetry/src/open_telemetry.cc +++ b/engine/modules/opentelemetry/src/open_telemetry.cc @@ -20,8 +20,15 @@ #include "centreon_agent/agent_impl.hh" #include "com/centreon/common/http/https_connection.hh" -#include "com/centreon/engine/modules/opentelemetry/open_telemetry.hh" +#include "com/centreon/engine/host.hh" +#include "com/centreon/engine/service.hh" + +#include "com/centreon/engine/command_manager.hh" + +#include "open_telemetry.hh" + +#include "com/centreon/engine/commands/otel_connector.hh" #include "otl_fmt.hh" #include "otl_server.hh" @@ -37,8 +44,7 @@ open_telemetry::open_telemetry( const std::string_view config_file_path, const std::shared_ptr& io_context, const std::shared_ptr& logger) - : _second_timer(*io_context), - _config_file_path(config_file_path), + : _config_file_path(config_file_path), _logger(logger), _io_context(io_context) { SPDLOG_LOGGER_INFO(_logger, "load of open telemetry module"); @@ -88,8 +94,6 @@ void open_telemetry::_reload() { fmt::formatter<::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>::json_grpc_format = new_conf->get_json_grpc_log(); - data_point_fifo::update_fifo_limit(new_conf->get_second_fifo_expiry(), - new_conf->get_max_fifo_size()); _conf = std::move(new_conf); @@ -98,7 +102,7 @@ void open_telemetry::_reload() { std::make_unique( _io_context, [me = shared_from_this()](const metric_request_ptr& request) { - me->_on_metric(request); + me->on_metric(request); }, _logger); } @@ -131,7 +135,6 @@ std::shared_ptr open_telemetry::load( _instance = std::make_shared(config_path, io_context, logger); instance()->_reload(); - instance()->_start_second_timer(); } return instance(); } @@ -152,7 +155,7 @@ void open_telemetry::_create_otl_server( _otl_server = otl_server::load( _io_context, server_conf, agent_conf, [me = shared_from_this()](const metric_request_ptr& request) { - me->_on_metric(request); + me->on_metric(request); }, _logger); } catch (const std::exception& e) { @@ -246,8 +249,6 @@ void open_telemetry::_shutdown() { if (to_shutdown) { to_shutdown->shutdown(std::chrono::seconds(10)); } - std::lock_guard l(_protect); - _second_timer.cancel(); } /** @@ -300,80 +301,10 @@ open_telemetry::create_extractor( } } -/** - * @brief converter is created for each check, so in order to not parse otel - * connector command line on each check , we create a - * check_result_builder_config object that is used to create converter it search - * the flag extractor - * - * @param cmd_line - * @return - * std::shared_ptr - */ std::shared_ptr< - com::centreon::engine::commands::otel::check_result_builder_config> -open_telemetry::create_check_result_builder_config( - const std::string& cmd_line) { - return otl_check_result_builder::create_check_result_builder_config(cmd_line); -} - -/** - * @brief simulate a check by reading in metrics fifos - * It creates an otel_converter, the first word of processed_cmd is the name - * of converter such as nagios_telegraf. Following parameters are used by - * converter - * - * @param processed_cmd converter type with arguments - * @param command_id command id - * @param macros - * @param timeout - * @param res filled if it returns true - * @param handler called later if it returns false - * @return true res is filled with a result - * @return false result will be passed to handler as soon as available or - * timeout - * @throw if converter type is unknown - */ -bool open_telemetry::check( - const std::string& processed_cmd, - const std::shared_ptr& - conv_config, - uint64_t command_id, - nagios_macros& macros, - uint32_t timeout, - commands::result& res, - commands::otel::result_callback&& handler) { - std::shared_ptr to_use; - try { - to_use = otl_check_result_builder::create( - processed_cmd, - std::static_pointer_cast(conv_config), - command_id, *macros.host_ptr, macros.service_ptr, - std::chrono::system_clock::now() + std::chrono::seconds(timeout), - std::move(handler), _logger); - } catch (const std::exception& e) { - SPDLOG_LOGGER_ERROR(_logger, "fail to create converter for {} : {}", - processed_cmd, e.what()); - throw; - }; - - bool res_available = to_use->sync_build_result_from_metrics(_fifo, res); - - if (res_available) { - SPDLOG_LOGGER_TRACE(_logger, "data available for command {} converter:{}", - command_id, *to_use); - return true; - } - - SPDLOG_LOGGER_TRACE( - _logger, "data unavailable for command {} timeout: {} converter:{}", - command_id, timeout, *to_use); - - // metrics not yet available = wait for data or until timeout - std::lock_guard l(_protect); - _waiting.insert(to_use); - - return false; + com::centreon::engine::commands::otel::otl_check_result_builder_base> +open_telemetry::create_check_result_builder(const std::string& cmdline) { + return otl_check_result_builder::create(cmdline, _logger); } /** @@ -385,7 +316,7 @@ bool open_telemetry::check( * * @param metrics collector request */ -void open_telemetry::_on_metric(const metric_request_ptr& metrics) { +void open_telemetry::on_metric(const metric_request_ptr& metrics) { std::vector unknown; { std::lock_guard l(_protect); @@ -395,13 +326,15 @@ void open_telemetry::_on_metric(const metric_request_ptr& metrics) { unknown.push_back(data_pt); }); } else { - waiting_converter::nth_index<0>::type& host_serv_index = - _waiting.get<0>(); - std::vector> to_notify; + std::shared_ptr, metric_to_datapoints>> + known_data_pt = std::make_shared< + absl::flat_hash_map, + metric_to_datapoints>>(); auto last_success = _extractors.begin(); otl_data_point::extract_data_points( - metrics, [this, &unknown, &last_success, &host_serv_index, - &to_notify](const otl_data_point& data_pt) { + metrics, [this, &unknown, &last_success, + known_data_pt](const otl_data_point& data_pt) { bool data_point_known = false; // we try all extractors and we begin with the last which has // achieved to extract host @@ -410,17 +343,10 @@ void open_telemetry::_on_metric(const metric_request_ptr& metrics) { last_success->second->extract_host_serv_metric(data_pt); if (!hostservmetric.host.empty()) { // match - _fifo.add_data_point(hostservmetric.host, - hostservmetric.service, - hostservmetric.metric, data_pt); - - // converters waiting this metric? - auto waiting = host_serv_index.equal_range( - host_serv{hostservmetric.host, hostservmetric.service}); - while (waiting.first != waiting.second) { - to_notify.push_back(*waiting.first); - waiting.first = host_serv_index.erase(waiting.first); - } + (*known_data_pt)[std::make_pair(hostservmetric.host, + hostservmetric.service)] + [data_pt.get_metric().name()] + .insert(data_pt); data_point_known = true; break; } @@ -435,16 +361,29 @@ void open_telemetry::_on_metric(const metric_request_ptr& metrics) { data_pt); // unknown metric => forward to broker } }); - SPDLOG_LOGGER_TRACE(_logger, "fifos:{}", _fifo); - // we wait that all request datas have been computed to give us more - // chance of converter success - for (auto to_callback : to_notify) { - if (!to_callback->async_build_result_from_metrics( - _fifo)) { // not enough data => repush in _waiting - _waiting.insert(to_callback); - } - } - SPDLOG_LOGGER_TRACE(_logger, "fifos:{}", _fifo); + + // we post all check results in the main thread + auto fn = std::packaged_task( + [known_data_pt, metrics, logger = _logger]() { + // for each host or service, we generate a result + for (const auto& host_serv_data : *known_data_pt) { + // get connector for this service + std::shared_ptr conn = + commands::otel_connector::get_otel_connector_from_host_serv( + host_serv_data.first.first, host_serv_data.first.second); + if (!conn) { + SPDLOG_LOGGER_ERROR( + logger, "no opentelemetry connector found for {}:{}", + host_serv_data.first.first, host_serv_data.first.second); + } else { + conn->process_data_pts(host_serv_data.first.first, + host_serv_data.first.second, + host_serv_data.second); + } + } + return OK; + }); + command_manager::instance().enqueue(std::move(fn)); } } if (!unknown.empty()) { @@ -453,51 +392,6 @@ void open_telemetry::_on_metric(const metric_request_ptr& metrics) { } } -/** - * @brief the second timer is used to handle converter timeouts - * - */ -void open_telemetry::_start_second_timer() { - std::lock_guard l(_protect); - _second_timer.expires_from_now(std::chrono::seconds(1)); - _second_timer.async_wait( - [me = shared_from_this()](const boost::system::error_code& err) { - if (!err) { - me->_second_timer_handler(); - } - }); -} - -/** - * @brief notify all timeouts - * - */ -void open_telemetry::_second_timer_handler() { - std::vector> to_notify; - { - std::lock_guard l(_protect); - std::chrono::system_clock::time_point now = - std::chrono::system_clock::now(); - waiting_converter::nth_index<1>::type& expiry_index = _waiting.get<1>(); - while (!_waiting.empty()) { - auto oldest = expiry_index.begin(); - if ((*oldest)->get_time_out() > now) { - break; - } - to_notify.push_back(*oldest); - expiry_index.erase(oldest); - } - } - - // notify all timeout - for (std::shared_ptr to_not : to_notify) { - SPDLOG_LOGGER_DEBUG(_logger, "time out: {}", *to_not); - to_not->async_time_out(); - } - - _start_second_timer(); -} - /** * @brief unknown metrics are directly forwarded to broker * diff --git a/engine/modules/opentelemetry/src/otl_check_result_builder.cc b/engine/modules/opentelemetry/src/otl_check_result_builder.cc index 517374773a5..3660d4fe73c 100644 --- a/engine/modules/opentelemetry/src/otl_check_result_builder.cc +++ b/engine/modules/opentelemetry/src/otl_check_result_builder.cc @@ -16,10 +16,14 @@ * For more information : contact@centreon.com */ +#include "com/centreon/engine/checks/checker.hh" #include "com/centreon/engine/globals.hh" +#include "com/centreon/engine/host.hh" +#include "com/centreon/engine/notifier.hh" +#include "com/centreon/engine/service.hh" + #include "com/centreon/exceptions/msg_fmt.hh" -#include "data_point_fifo_container.hh" #include "otl_check_result_builder.hh" #include "centreon_agent/agent_check_result_builder.hh" @@ -35,157 +39,22 @@ using namespace com::centreon::engine::modules::opentelemetry; * object * * @param cmd_line - * @param command_id - * @param host - * @param service - * @param timeout - * @param handler called when mandatory metrics will be available * @param logger */ otl_check_result_builder::otl_check_result_builder( const std::string& cmd_line, - uint64_t command_id, - const host& host, - const service* service, - std::chrono::system_clock::time_point timeout, - commands::otel::result_callback&& handler, const std::shared_ptr& logger) - : _cmd_line(cmd_line), - _command_id(command_id), - _host_serv{host.name(), service ? service->description() : ""}, - _timeout(timeout), - _callback(handler), - _logger(logger) {} - -/** - * @brief try to build a check result - * - * @param data_pts - * @param res - * @return true all mandatory metrics are available and a check_result is built - * @return false - */ -bool otl_check_result_builder::sync_build_result_from_metrics( - data_point_fifo_container& data_pts, - commands::result& res) { - std::lock_guard l(data_pts); - auto& fifos = data_pts.get_fifos(_host_serv.first, _host_serv.second); - if (!fifos.empty() && _build_result_from_metrics(fifos, res)) { - return true; - } - // no data available - return false; -} - -/** - * @brief called when data is received from otel - * clients - * - * @param data_pts - * @return true otl_check_result_builder has managed to create check result - * @return false - */ -bool otl_check_result_builder::async_build_result_from_metrics( - data_point_fifo_container& data_pts) { - commands::result res; - bool success = false; - { - std::lock_guard l(data_pts); - auto& fifos = data_pts.get_fifos(_host_serv.first, _host_serv.second); - success = !fifos.empty() && _build_result_from_metrics(fifos, res); - } - if (success) { - _callback(res); - } - return success; -} - -/** - * @brief called when no data is received before - * _timeout - * - */ -void otl_check_result_builder::async_time_out() { - commands::result res; - res.exit_status = process::timeout; - res.command_id = _command_id; - _callback(res); -} + : _cmd_line(cmd_line), _logger(logger) {} /** * @brief create a otl_converter_config from a command line - * first field identify type of config - * Example: - * @code {.c++} - * std::shared_ptr converter = - * otl_check_result_builder::create("--processor=nagios_telegraf - * --fifo_depth=5", conf, 5, *host, serv, timeout_point, [](const - * commads::result &res){}, _logger); - * @endcode * * @param cmd_line - * @param conf bean configuration object created by - * create_check_result_builder_config - * @param command_id - * @param host - * @param service - * @param timeout - * @param handler handler that will be called once we have all metrics mandatory - * to create a check_result * @return std::shared_ptr */ std::shared_ptr otl_check_result_builder::create( const std::string& cmd_line, - const std::shared_ptr& conf, - uint64_t command_id, - const host& host, - const service* service, - std::chrono::system_clock::time_point timeout, - commands::otel::result_callback&& handler, const std::shared_ptr& logger) { - switch (conf->get_type()) { - case check_result_builder_config::converter_type:: - nagios_check_result_builder: - return std::make_shared( - cmd_line, command_id, host, service, timeout, std::move(handler), - logger); - case check_result_builder_config::converter_type:: - centreon_agent_check_result_builder: - return std::make_shared( - cmd_line, command_id, host, service, timeout, std::move(handler), - logger); - default: - SPDLOG_LOGGER_ERROR(logger, "unknown converter type:{}", cmd_line); - throw exceptions::msg_fmt("unknown converter type:{}", cmd_line); - } -} - -/** - * @brief debug infos - * - * @param output string to log - */ -void otl_check_result_builder::dump(std::string& output) const { - output = fmt::format( - "host:{}, service:{}, command_id={}, timeout:{} cmdline: \"{}\"", - _host_serv.first, _host_serv.second, _command_id, _timeout, _cmd_line); -} - -/** - * @brief create a otl_converter_config from a command line - * --processor flag identifies type of converter - * Example: - * @code {.c++} - * std::shared_ptr converter = - * otl_converter::create_check_result_builder_config("--processor=nagios_telegraf - * --fifo_depth=5"); - * - * @param cmd_line - * @return std::shared_ptr - */ -std::shared_ptr -otl_check_result_builder::create_check_result_builder_config( - const std::string& cmd_line) { static initialized_data_class desc( [](po::options_description& desc) { desc.add_options()("processor", po::value(), @@ -204,21 +73,75 @@ otl_check_result_builder::create_check_result_builder_config( } std::string extractor_type = vm["processor"].as(); if (extractor_type == "nagios_telegraf") { - return std::make_shared( - check_result_builder_config::converter_type:: - nagios_check_result_builder); + return std::make_shared(cmd_line, + logger); } else if (extractor_type == "centreon_agent") { - return std::make_shared( - check_result_builder_config::converter_type:: - centreon_agent_check_result_builder); + return std::make_shared( + cmd_line, logger); } else { throw exceptions::msg_fmt("unknown processor in {}", cmd_line); } } catch (const std::exception& e) { - SPDLOG_LOGGER_ERROR( - config_logger, - "fail to get opentelemetry converter configuration from {}: {}", - cmd_line, e.what()); + SPDLOG_LOGGER_ERROR(config_logger, + "fail to get opentelemetry check_result_builder " + "configuration from {}: {}", + cmd_line, e.what()); throw; } } + +/** + * @brief convert opentelemetry datas in check_result and post it to + * checks::checker::instance() Caution, this function must be called from engine + * main thread + * + * @param host + * @param serv empty if result of host check + * @param data_pts opentelemetry data points + */ +void otl_check_result_builder::process_data_pts( + const std::string_view& hst, + const std::string_view& serv, + const metric_to_datapoints& data_pts) { + check_source notifier_type = check_source::service_check; + notifier* host_or_serv = nullptr; + + if (serv.empty()) { + notifier_type = check_source::host_check; + auto found = host::hosts.find(hst); + if (found == host::hosts.end()) { + SPDLOG_LOGGER_ERROR(_logger, "unknow host: {}", hst); + return; + } + host_or_serv = found->second.get(); + } else { + auto found = service::services.find(std::make_pair(hst, serv)); + if (found == service::services.end()) { + SPDLOG_LOGGER_ERROR(_logger, "unknow service {} for host", serv, hst); + return; + } + host_or_serv = found->second.get(); + } + timeval zero = {0, 0}; + std::shared_ptr res = std::make_shared( + notifier_type, host_or_serv, checkable::check_type::check_passive, + CHECK_OPTION_NONE, false, 0, zero, zero, false, true, 0, ""); + if (build_result_from_metrics(data_pts, *res)) { + checks::checker::instance().add_check_result_to_reap(res); + } else { + SPDLOG_LOGGER_ERROR( + _logger, + "fail to convert opentelemetry datas in centreon check_result for host " + "{}, serv {}", + hst, serv); + } +} + +/** + * @brief debug infos + * + * @param output string to log + */ +void otl_check_result_builder::dump(std::string& output) const { + output = _cmd_line; +} diff --git a/engine/modules/opentelemetry/src/otl_config.cc b/engine/modules/opentelemetry/src/otl_config.cc index f0c62dda374..386615aaf19 100644 --- a/engine/modules/opentelemetry/src/otl_config.cc +++ b/engine/modules/opentelemetry/src/otl_config.cc @@ -47,16 +47,6 @@ static constexpr std::string_view _grpc_config_schema(R"( "description": "true if we log otl grpc object to json format", "type": "boolean" }, - "second_fifo_expiry": { - "description:": "lifetime of data points in fifos", - "type": "integer", - "min": 30 - }, - "max_fifo_size": { - "description:": "max number of data points in fifos", - "type": "integer", - "min": 1 - }, "otel_server": { "description": "otel grpc config", "type": "object" @@ -98,8 +88,6 @@ otl_config::otl_config(const std::string_view& file_path, file_content.validate(validator); _max_length_grpc_log = file_content.get_unsigned("max_length_grpc_log", 400); _json_grpc_log = file_content.get_bool("grpc_json_log", false); - _second_fifo_expiry = file_content.get_unsigned("second_fifo_expiry", 600); - _max_fifo_size = file_content.get_unsigned("max_fifo_size", 5); if (file_content.has_member("otel_server")) { try { _grpc_conf = @@ -158,9 +146,7 @@ bool otl_config::operator==(const otl_config& right) const { } bool ret = *_grpc_conf == *right._grpc_conf && _max_length_grpc_log == right._max_length_grpc_log && - _json_grpc_log == right._json_grpc_log && - _second_fifo_expiry == right._second_fifo_expiry && - _max_fifo_size == right._max_fifo_size; + _json_grpc_log == right._json_grpc_log; if (!ret) { return false; diff --git a/engine/modules/opentelemetry/src/otl_data_point.cc b/engine/modules/opentelemetry/src/otl_data_point.cc index 7e5273725f1..2cf7374cd3b 100644 --- a/engine/modules/opentelemetry/src/otl_data_point.cc +++ b/engine/modules/opentelemetry/src/otl_data_point.cc @@ -43,6 +43,7 @@ otl_data_point::otl_data_point( _data_point(data_pt), _data_point_attributes(data_pt.attributes()), _exemplars(data_pt.exemplars()), + _start_nano_timestamp(data_pt.start_time_unix_nano()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::number) { _value = data_pt.as_double() ? data_pt.as_double() : data_pt.as_int(); @@ -61,6 +62,7 @@ otl_data_point::otl_data_point( _data_point(data_pt), _data_point_attributes(data_pt.attributes()), _exemplars(data_pt.exemplars()), + _start_nano_timestamp(data_pt.start_time_unix_nano()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::histogram) { _value = data_pt.count(); @@ -80,6 +82,7 @@ otl_data_point::otl_data_point( _data_point(data_pt), _data_point_attributes(data_pt.attributes()), _exemplars(data_pt.exemplars()), + _start_nano_timestamp(data_pt.start_time_unix_nano()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::exponential_histogram) { _value = data_pt.count(); @@ -98,6 +101,7 @@ otl_data_point::otl_data_point( _data_point(data_pt), _data_point_attributes(data_pt.attributes()), _exemplars(_empty_exemplars), + _start_nano_timestamp(data_pt.start_time_unix_nano()), _nano_timestamp(data_pt.time_unix_nano()), _type(data_point_type::summary) { _value = data_pt.count(); diff --git a/engine/modules/opentelemetry/src/telegraf/nagios_check_result_builder.cc b/engine/modules/opentelemetry/src/telegraf/nagios_check_result_builder.cc index e8515b2e217..5bf775b9f3b 100644 --- a/engine/modules/opentelemetry/src/telegraf/nagios_check_result_builder.cc +++ b/engine/modules/opentelemetry/src/telegraf/nagios_check_result_builder.cc @@ -16,7 +16,6 @@ * For more information : contact@centreon.com */ -#include "data_point_fifo_container.hh" #include "otl_check_result_builder.hh" #include "telegraf/nagios_check_result_builder.hh" @@ -153,47 +152,58 @@ static std::string_view get_nagios_telegraf_suffix( } /** - * @brief + * @brief fill a check_result from otel datas * - * @param fifos fifos indexed by metric_name such as check_icmp_critical_gt, - * check_icmp_state - * @return com::centreon::engine::commands::result + * @param data_pts + * @param res + * @return true if res is filled + * @return false */ -bool nagios_check_result_builder::_build_result_from_metrics( - metric_name_to_fifo& fifos, - commands::result& res) { +bool nagios_check_result_builder::build_result_from_metrics( + const metric_to_datapoints& data_pts, + check_result& res) { // first we search last state timestamp uint64_t last_time = 0; - for (auto& metric_to_fifo : fifos) { - if (get_nagios_telegraf_suffix(metric_to_fifo.first) == "state") { - auto& fifo = metric_to_fifo.second.get_fifo(); - if (!fifo.empty()) { - const auto& last_sample = *fifo.rbegin(); - last_time = last_sample.get_nano_timestamp(); - res.exit_code = last_sample.get_value(); - metric_to_fifo.second.clean_oldest(last_time); + for (const auto& metric_to_data_pts : data_pts) { + if (get_nagios_telegraf_suffix(metric_to_data_pts.first) == "state") { + const auto& last_sample = metric_to_data_pts.second.rbegin(); + last_time = last_sample->get_nano_timestamp(); + res.set_return_code(last_sample->get_value()); + + res.set_finish_time( + {.tv_sec = static_cast(last_time / 1000000000), + .tv_usec = static_cast((last_time / 1000) % 1000000)}); + + if (last_sample->get_start_nano_timestamp() > 0) { + res.set_start_time( + {.tv_sec = static_cast( + last_sample->get_start_nano_timestamp() / 1000000000), + .tv_usec = static_cast( + (last_sample->get_start_nano_timestamp() / 1000) % 1000000)}); + } else { + res.set_start_time(res.get_finish_time()); } break; } } + if (!last_time) { return false; } - res.command_id = get_command_id(); - res.exit_status = process::normal; - res.end_time = res.start_time = last_time / 1000000000; // construct perfdata list by perfdata name std::map perfs; - for (auto& metric_to_fifo : fifos) { - std::string_view suffix = get_nagios_telegraf_suffix(metric_to_fifo.first); - const data_point_fifo::container& data_points = - metric_to_fifo.second.get_fifo(); + for (const auto& metric_to_data_pts : data_pts) { + std::string_view suffix = + get_nagios_telegraf_suffix(metric_to_data_pts.first); + if (suffix == "state") { + continue; + } // we scan all data points for that metric (example check_icmp_critical_gt // can contain a data point for pl and another for rta) - auto data_pt_search = data_points.equal_range(last_time); + auto data_pt_search = metric_to_data_pts.second.equal_range(last_time); for (; data_pt_search.first != data_pt_search.second; ++data_pt_search.first) { const auto attributes = data_pt_search.first->get_data_point_attributes(); @@ -218,49 +228,53 @@ bool nagios_check_result_builder::_build_result_from_metrics( _logger); } } - metric_to_fifo.second.clean_oldest(last_time); } - data_point_fifo_container::clean_empty_fifos(fifos); + std::string output; // then format all in a string with format: // 'label'=value[UOM];[warn];[crit];[min];[max] - if (res.exit_code >= 0 && res.exit_code < 4) { - res.output = state_str[res.exit_code]; + if (res.get_return_code() >= 0 && res.get_return_code() < 4) { + output = state_str[res.get_return_code()]; } - res.output.push_back('|'); + output.push_back('|'); for (const auto& perf : perfs) { if (perf.second.val) { - absl::StrAppend(&res.output, perf.first, "=", *perf.second.val, + absl::StrAppend(&output, perf.first, "=", *perf.second.val, perf.second.unit, ";"); if (perf.second.warning_le) { - absl::StrAppend(&res.output, "@", *perf.second.warning_le, ":", + absl::StrAppend(&output, "@", *perf.second.warning_le, ":", *perf.second.warning_ge); } else if (perf.second.warning_lt) { - absl::StrAppend(&res.output, *perf.second.warning_lt, ":", + absl::StrAppend(&output, *perf.second.warning_lt, ":", *perf.second.warning_gt); } - res.output.push_back(';'); + output.push_back(';'); if (perf.second.critical_le) { - absl::StrAppend(&res.output, "@", *perf.second.critical_le, ":", + absl::StrAppend(&output, "@", *perf.second.critical_le, ":", *perf.second.critical_ge); } else if (perf.second.critical_lt) { - absl::StrAppend(&res.output, *perf.second.critical_lt, ":", + absl::StrAppend(&output, *perf.second.critical_lt, ":", *perf.second.critical_gt); } - res.output.push_back(';'); + output.push_back(';'); if (perf.second.min) { - absl::StrAppend(&res.output, *perf.second.min); + absl::StrAppend(&output, *perf.second.min); } - res.output.push_back(';'); + output.push_back(';'); if (perf.second.max) { - absl::StrAppend(&res.output, *perf.second.max); + absl::StrAppend(&output, *perf.second.max); } - res.output.push_back(' '); + output.push_back(' '); } } // remove last space - res.output.pop_back(); + if (*output.rbegin() == ' ') { + output.pop_back(); + } + + res.set_output(output); + return true; } diff --git a/engine/src/check_result.cc b/engine/src/check_result.cc index d94e2f4fe2e..a319290246f 100644 --- a/engine/src/check_result.cc +++ b/engine/src/check_result.cc @@ -27,7 +27,6 @@ using namespace com::centreon::engine; check_result::check_result() : _object_check_type{check_source::service_check}, - _command_id(0), _notifier{nullptr}, _check_type(checkable::check_type::check_passive), _check_options{0}, @@ -52,7 +51,6 @@ check_result::check_result(enum check_source object_check_type, int return_code, std::string output) : _object_check_type{object_check_type}, - _command_id(0), _notifier{notifier}, _check_type(check_type), _check_options{check_options}, @@ -124,8 +122,7 @@ void check_result::set_check_options(unsigned check_options) { namespace com::centreon::engine { std::ostream& operator<<(std::ostream& stream, const check_result& res) { - stream << "command_id=" << res.get_command_id() - << " timeout=" << res.get_early_timeout() + stream << " timeout=" << res.get_early_timeout() << " ok=" << res.get_exited_ok() << " ret_code=" << res.get_return_code() << " output:" << res.get_output(); diff --git a/engine/src/checks/checker.cc b/engine/src/checks/checker.cc index 6a86eb69461..235d9ff63e4 100644 --- a/engine/src/checks/checker.cc +++ b/engine/src/checks/checker.cc @@ -393,7 +393,6 @@ void checker::finished(commands::result const& res) noexcept { result->set_exited_ok(res.exit_status == process::normal || res.exit_status == process::timeout); result->set_output(res.output); - result->set_command_id(res.command_id); // Queue check result. lock.lock(); diff --git a/engine/src/commands/otel_connector.cc b/engine/src/commands/otel_connector.cc index e311d5192f6..6b8433e4d15 100644 --- a/engine/src/commands/otel_connector.cc +++ b/engine/src/commands/otel_connector.cc @@ -37,15 +37,17 @@ absl::flat_hash_map> * @param cmd_line * @param listener */ -void otel_connector::create(const std::string& connector_name, - const std::string& cmd_line, - commands::command_listener* listener) { +std::shared_ptr otel_connector::create( + const std::string& connector_name, + const std::string& cmd_line, + commands::command_listener* listener) { std::shared_ptr cmd( std::make_shared(connector_name, cmd_line, listener)); auto iter_res = _commands.emplace(connector_name, cmd); if (!iter_res.second) { iter_res.first->second = cmd; } + return cmd; } /** @@ -90,6 +92,26 @@ std::shared_ptr otel_connector::get_otel_connector( : std::shared_ptr(); } +/** + * @brief get otel command that is used by host serv + * Caution: This function must be called from engine main thread + * + * @param host + * @param serv + * @return std::shared_ptr null if not found + */ +std::shared_ptr +otel_connector::get_otel_connector_from_host_serv( + const std::string_view& host, + const std::string_view& serv) { + for (const auto& name_to_conn : _commands) { + if (name_to_conn.second->_host_serv_list->contains(host, serv)) { + return name_to_conn.second; + } + } + return {}; +} + /** * @brief erase all otel commands * @@ -155,62 +177,8 @@ uint64_t otel_connector::run(const std::string& processed_cmd, uint32_t timeout, const check_result::pointer& to_push_to_checker, const void* caller) { - std::shared_ptr otel = - otel::open_telemetry_base::instance(); - - if (!otel) { - SPDLOG_LOGGER_ERROR(_logger, - "open telemetry module not loaded for connector: {}", - get_name()); - throw exceptions::msg_fmt( - "open telemetry module not loaded for connector: {}", get_name()); - } - - uint64_t command_id(get_uniq_id()); - - if (!gest_call_interval(command_id, to_push_to_checker, caller)) { - return command_id; - } - - if (!_conv_conf) { - SPDLOG_LOGGER_ERROR( - _logger, "{} unable to do a check without a converter configuration", - get_name()); - throw exceptions::msg_fmt( - "{} unable to do a check without a converter configuration", - get_name()); - } - SPDLOG_LOGGER_TRACE( - _logger, - "otel_connector::async_run: connector='{}', command_id={}, " - "cmd='{}', timeout={}", - _name, command_id, processed_cmd, timeout); - - result res; - bool res_available = otel->check( - processed_cmd, _conv_conf, command_id, macros, timeout, res, - [me = shared_from_this(), command_id](const result& async_res) { - SPDLOG_LOGGER_TRACE( - me->_logger, "otel_connector async_run callback: connector='{}' {}", - me->_name, async_res); - me->update_result_cache(command_id, async_res); - if (me->_listener) { - (me->_listener->finished)(async_res); - } - }); - - if (res_available) { - SPDLOG_LOGGER_TRACE(_logger, - "otel_connector data available : connector='{}', " - "cmd='{}', {}", - _name, processed_cmd, res); - update_result_cache(command_id, res); - if (_listener) { - (_listener->finished)(res); - } - } - - return command_id; + SPDLOG_LOGGER_ERROR(_logger, "open telemetry services must be passive"); + throw exceptions::msg_fmt("open telemetry services must be passive"); } /** @@ -227,41 +195,25 @@ void otel_connector::run(const std::string& processed_cmd, nagios_macros& macros, uint32_t timeout, result& res) { - std::shared_ptr otel = - otel::open_telemetry_base::instance(); - if (!otel) { - SPDLOG_LOGGER_ERROR(_logger, - "open telemetry module not loaded for connector: {}", - get_name()); - throw exceptions::msg_fmt( - "open telemetry module not loaded for connector: {}", get_name()); - } - - uint64_t command_id(get_uniq_id()); - - SPDLOG_LOGGER_TRACE(_logger, - "otel_connector::sync_run: connector='{}', cmd='{}', " - "command_id={}, timeout={}", - _name, processed_cmd, command_id, timeout); - - std::condition_variable cv; - std::mutex cv_m; - - bool res_available = - otel->check(processed_cmd, _conv_conf, command_id, macros, timeout, res, - [&res, &cv](const result& async_res) { - res = async_res; - cv.notify_one(); - }); + SPDLOG_LOGGER_ERROR(_logger, "open telemetry services must be passive"); + throw exceptions::msg_fmt("open telemetry services must be passive"); +} - // no otl_data_point available => wait util available or timeout - if (!res_available) { - std::unique_lock l(cv_m); - cv.wait(l); - } - SPDLOG_LOGGER_TRACE( - _logger, "otel_connector::end sync_run: connector='{}', cmd='{}', {}", - _name, processed_cmd, res); +/** + * @brief convert opentelemetry datas in check_result and post it to + * checks::checker::instance() Caution, this function must be called from engine + * main thread + * + * @param host + * @param serv empty if result of host check + * @param data_pts opentelemetry data points + */ +void otel_connector::process_data_pts( + const std::string_view& host, + const std::string_view& serv, + const com::centreon::engine::modules::opentelemetry::metric_to_datapoints& + data_pts) { + _check_result_builder->process_data_pts(host, serv, data_pts); } /** @@ -288,12 +240,12 @@ void otel_connector::init() { get_name(), get_command_line()); } try { - if (!_conv_conf) { + if (!_check_result_builder) { std::shared_ptr otel = otel::open_telemetry_base::instance(); if (otel) { - _conv_conf = - otel->create_check_result_builder_config(get_command_line()); + _check_result_builder = + otel->create_check_result_builder(get_command_line()); } } } catch (const std::exception& e) { diff --git a/engine/src/commands/otel_interface.cc b/engine/src/commands/otel_interface.cc index 19d5559b1fb..b3e3fd67545 100644 --- a/engine/src/commands/otel_interface.cc +++ b/engine/src/commands/otel_interface.cc @@ -45,21 +45,3 @@ void host_serv_list::remove(const std::string& host, } } } - -/** - * @brief test if a host serv pair is contained in list - * - * @param host - * @param service_description - * @return true found - * @return false not found - */ -bool host_serv_list::contains(const std::string& host, - const std::string& service_description) const { - absl::ReaderMutexLock l(&_data_m); - auto host_search = _data.find(host); - if (host_search != _data.end()) { - return host_search->second.contains(service_description); - } - return false; -} diff --git a/engine/tests/macros/macro_service.cc b/engine/tests/macros/macro_service.cc index 13befa25cf2..e1eedb81e40 100644 --- a/engine/tests/macros/macro_service.cc +++ b/engine/tests/macros/macro_service.cc @@ -47,6 +47,8 @@ using namespace com::centreon; using namespace com::centreon::engine; +using namespace std::literals; + class MacroService : public TestEngine { public: void SetUp() override { @@ -113,8 +115,8 @@ TEST_F(MacroService, ServiceMacro) { std::string out; host::hosts["test_host"]->set_current_state(host::state_up); host::hosts["test_host"]->set_has_been_checked(true); - service::services[std::make_pair("test_host", "test_svc")]->set_plugin_output( - "foo bar!"); + service::services[std::make_pair("test_host"sv, "test_svc"sv)] + ->set_plugin_output("foo bar!"); process_macros_r(mac, "$SERVICEOUTPUT:test_host:test_svc$", out, 1); ASSERT_EQ(out, "foo bar!"); } @@ -391,7 +393,7 @@ TEST_F(MacroService, ServicePerfData) { nagios_macros* mac(get_global_macros()); host::hosts["test_host"]->set_current_state(host::state_up); host::hosts["test_host"]->set_has_been_checked(true); - service::services[std::make_pair("test_host", "test_svc")]->set_perf_data( + service::services[std::make_pair("test_host"sv, "test_svc"sv)]->set_perf_data( "foo"); process_macros_r(mac, "$SERVICEPERFDATA:test_host:test_svc$", out, 0); ASSERT_EQ(out, "foo"); @@ -441,7 +443,7 @@ TEST_F(MacroService, ServiceExecutionTime) { nagios_macros* mac(get_global_macros()); host::hosts["test_host"]->set_current_state(host::state_up); host::hosts["test_host"]->set_has_been_checked(true); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_execution_time(20.00); process_macros_r(mac, "$SERVICEEXECUTIONTIME:test_host:test_svc$", out, 1); ASSERT_EQ(out, "20.000"); @@ -491,7 +493,7 @@ TEST_F(MacroService, ServiceLatency) { nagios_macros* mac(get_global_macros()); host::hosts["test_host"]->set_current_state(host::state_up); host::hosts["test_host"]->set_has_been_checked(true); - service::services[std::make_pair("test_host", "test_svc")]->set_latency( + service::services[std::make_pair("test_host"sv, "test_svc"sv)]->set_latency( 20.00); process_macros_r(mac, "$SERVICELATENCY:test_host:test_svc$", out, 1); ASSERT_EQ(out, "20.000"); @@ -541,7 +543,7 @@ TEST_F(MacroService, ServiceDuration) { nagios_macros* mac(get_global_macros()); host::hosts["test_host"]->set_current_state(host::state_up); host::hosts["test_host"]->set_has_been_checked(true); - service::services[std::make_pair("test_host", "test_svc")]->set_latency( + service::services[std::make_pair("test_host"sv, "test_svc"sv)]->set_latency( 20.00); process_macros_r(mac, "$SERVICEDURATION:test_host:test_svc$", out, 1); ASSERT_EQ(out, "5787d 0h 53m 20s"); @@ -590,7 +592,7 @@ TEST_F(MacroService, ServiceDurationSec) { nagios_macros* mac(get_global_macros()); host::hosts["test_host"]->set_current_state(host::state_up); host::hosts["test_host"]->set_has_been_checked(true); - service::services[std::make_pair("test_host", "test_svc")]->set_latency( + service::services[std::make_pair("test_host"sv, "test_svc"sv)]->set_latency( 20.00); process_macros_r(mac, "$SERVICEDURATIONSEC:test_host:test_svc$", out, 1); ASSERT_EQ(out, "500000000"); @@ -815,8 +817,8 @@ TEST_F(MacroService, LastServiceOK) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")]->set_last_time_ok( - 20); + service::services[std::make_pair("test_host"sv, "test_svc"sv)] + ->set_last_time_ok(20); process_macros_r(mac, "$LASTSERVICEOK:test_host:test_svc$", out, 1); ASSERT_EQ(out, "20"); } @@ -852,7 +854,7 @@ TEST_F(MacroService, LastServiceWarning) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_last_time_warning(30); process_macros_r(mac, "$LASTSERVICEWARNING:test_host:test_svc$", out, 1); ASSERT_EQ(out, "30"); @@ -889,7 +891,7 @@ TEST_F(MacroService, LastServiceUnknown) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_last_time_unknown(40); process_macros_r(mac, "$LASTSERVICEUNKNOWN:test_host:test_svc$", out, 1); ASSERT_EQ(out, "40"); @@ -926,7 +928,7 @@ TEST_F(MacroService, LastServiceCritical) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_last_time_critical(50); process_macros_r(mac, "$LASTSERVICECRITICAL:test_host:test_svc$", out, 1); ASSERT_EQ(out, "50"); @@ -963,7 +965,7 @@ TEST_F(MacroService, ServiceCheckCommand) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_last_time_critical(50); process_macros_r(mac, "$SERVICECHECKCOMMAND:test_host:test_svc$", out, 1); ASSERT_EQ(out, "cmd"); @@ -1000,7 +1002,7 @@ TEST_F(MacroService, ServiceDisplayName) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_last_time_critical(50); process_macros_r(mac, "$SERVICEDISPLAYNAME:test_host:test_svc$", out, 1); ASSERT_EQ(out, "test_svc"); @@ -1474,7 +1476,7 @@ TEST_F(MacroService, LongServiceOutput) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$LONGSERVICEOUTPUT:test_host:test_svc$", out, 1); ASSERT_EQ(out, "test_long_output"); @@ -1512,7 +1514,7 @@ TEST_F(MacroService, ServiceNotificationID) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$SERVICENOTIFICATIONID:test_host:test_svc$", out, 1); ASSERT_EQ(out, "0"); @@ -1550,7 +1552,7 @@ TEST_F(MacroService, ServiceEventID) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$SERVICEEVENTID:test_host:test_svc$", out, 1); ASSERT_EQ(out, "0"); @@ -1588,7 +1590,7 @@ TEST_F(MacroService, LastServiceEventID) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$LASTSERVICEEVENTID:test_host:test_svc$", out, 1); ASSERT_EQ(out, "0"); @@ -1631,7 +1633,7 @@ TEST_F(MacroService, ServiceGroupNames) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test")] + service::services[std::make_pair("test_host"sv, "test"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$SERVICEGROUPNAMES:test_host:test$", out, 1); ASSERT_EQ(out, "test_group"); @@ -1669,7 +1671,7 @@ TEST_F(MacroService, MaxServiceAttempts) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$MAXSERVICEATTEMPTS:test_host:test_svc$", out, 1); ASSERT_EQ(out, "3"); @@ -1712,7 +1714,7 @@ TEST_F(MacroService, ServiceGroupNotes) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test")] + service::services[std::make_pair("test_host"sv, "test"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$SERVICEGROUPNOTES:test_group$", out, 1); ASSERT_EQ(out, "test_notes"); @@ -1911,7 +1913,7 @@ TEST_F(MacroService, ServiceTimeZone) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$SERVICETIMEZONE:test_host:test_svc$", out, 1); ASSERT_EQ(out, "test_time"); @@ -1949,7 +1951,7 @@ TEST_F(MacroService, LastServiceState) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$LASTSERVICESTATE:test_host:test_svc$", out, 1); ASSERT_EQ(out, "OK"); @@ -1987,7 +1989,7 @@ TEST_F(MacroService, LastServiceStateId) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$LASTSERVICESTATEID:test_host:test_svc$", out, 1); ASSERT_EQ(out, "0"); @@ -2095,7 +2097,7 @@ TEST_F(MacroService, LastServiceProblemID) { std::string out; nagios_macros* mac(get_global_macros()); - service::services[std::make_pair("test_host", "test_svc")] + service::services[std::make_pair("test_host"sv, "test_svc"sv)] ->set_long_plugin_output("test_long_output"); process_macros_r(mac, "$LASTSERVICEPROBLEMID:test_host:test_svc$", out, 1); ASSERT_EQ(out, "0"); diff --git a/engine/tests/opentelemetry/agent_check_result_builder_test.cc b/engine/tests/opentelemetry/agent_check_result_builder_test.cc index 7277d8acab0..101ae10c651 100644 --- a/engine/tests/opentelemetry/agent_check_result_builder_test.cc +++ b/engine/tests/opentelemetry/agent_check_result_builder_test.cc @@ -36,8 +36,6 @@ #include "opentelemetry/proto/common/v1/common.pb.h" #include "opentelemetry/proto/metrics/v1/metrics.pb.h" -#include "com/centreon/engine/modules/opentelemetry/data_point_fifo_container.hh" - #include "com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh" #include "com/centreon/engine/modules/opentelemetry/centreon_agent/agent_check_result_builder.hh" @@ -363,45 +361,11 @@ static const char* agent_exemple = R"( class otl_agent_check_result_builder_test : public TestEngine { protected: - std::shared_ptr _builder_config; - data_point_fifo_container _fifos; + absl::flat_hash_map + _received; public: otl_agent_check_result_builder_test() { - if (service::services.find({"test_host", "test_svc_builder_2"}) == - service::services.end()) { - init_config_state(); - config->contacts().clear(); - configuration::error_cnt err; - - configuration::applier::contact ct_aply; - configuration::contact ctct{new_configuration_contact("admin", true)}; - ct_aply.add_object(ctct); - ct_aply.expand_objects(*config); - ct_aply.resolve_object(ctct, err); - - configuration::host hst{ - new_configuration_host("test_host", "admin", 457)}; - configuration::applier::host hst_aply; - hst_aply.add_object(hst); - - configuration::service svc{new_configuration_service( - "test_host", "test_svc_builder", "admin", 458)}; - configuration::applier::service svc_aply; - svc_aply.add_object(svc); - configuration::service svc2{new_configuration_service( - "test_host", "test_svc_builder_2", "admin", 459)}; - svc_aply.add_object(svc2); - - hst_aply.resolve_object(hst, err); - svc_aply.resolve_object(svc, err); - svc_aply.resolve_object(svc2, err); - } - - _builder_config = - otl_check_result_builder::create_check_result_builder_config( - "--processor=centreon_agent"); - metric_request_ptr request = std::make_shared< ::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>(); @@ -417,29 +381,23 @@ class otl_agent_check_result_builder_test : public TestEngine { break; } } - _fifos.add_data_point("test_host", service_name, - data_pt.get_metric().name(), data_pt); + _received[service_name][data_pt.get_metric().name()].insert(data_pt); }); } }; TEST_F(otl_agent_check_result_builder_test, test_svc_builder) { auto check_result_builder = otl_check_result_builder::create( - "", _builder_config, 1789, *host::hosts.find("test_host")->second, - service::services.find({"test_host", "test_svc_builder"})->second.get(), - std::chrono::system_clock::time_point(), [&](const commands::result&) {}, - spdlog::default_logger()); + "--processor=centreon_agent", spdlog::default_logger()); - commands::result res; - bool success = - check_result_builder->sync_build_result_from_metrics(_fifos, res); + check_result res; + bool success = check_result_builder->build_result_from_metrics( + _received["test_svc_builder"], res); ASSERT_TRUE(success); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); - ASSERT_EQ(res.command_id, 1789); - ASSERT_EQ(res.start_time.to_useconds(), 1718345061381922153 / 1000); - ASSERT_EQ(res.end_time.to_useconds(), 1718345061381922153 / 1000); + ASSERT_EQ(res.get_return_code(), 0); + ASSERT_EQ(res.get_start_time().tv_sec, 1718345061381922153 / 1000000000); + ASSERT_EQ(res.get_finish_time().tv_sec, 1718345061381922153 / 1000000000); auto compare_to_excepted = [](const std::string& to_cmp) -> bool { return to_cmp == @@ -450,26 +408,21 @@ TEST_F(otl_agent_check_result_builder_test, test_svc_builder) { "metric=12;0:50;0:75;;"; }; - ASSERT_PRED1(compare_to_excepted, res.output); + ASSERT_PRED1(compare_to_excepted, res.get_output()); } TEST_F(otl_agent_check_result_builder_test, test_svc_builder_2) { auto check_result_builder = otl_check_result_builder::create( - "", _builder_config, 1789, *host::hosts.find("test_host")->second, - service::services.find({"test_host", "test_svc_builder_2"})->second.get(), - std::chrono::system_clock::time_point(), [&](const commands::result&) {}, - spdlog::default_logger()); + "--processor=centreon_agent", spdlog::default_logger()); - commands::result res; - bool success = - check_result_builder->sync_build_result_from_metrics(_fifos, res); + check_result res; + bool success = check_result_builder->build_result_from_metrics( + _received["test_svc_builder_2"], res); ASSERT_TRUE(success); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); - ASSERT_EQ(res.command_id, 1789); - ASSERT_EQ(res.start_time.to_useconds(), 1718345061713456225 / 1000); - ASSERT_EQ(res.end_time.to_useconds(), 1718345061713456225 / 1000); + ASSERT_EQ(res.get_return_code(), 0); + ASSERT_EQ(res.get_start_time().tv_sec, 1718345061713456225 / 1000000000); + ASSERT_EQ(res.get_finish_time().tv_sec, 1718345061713456225 / 1000000000); auto compare_to_excepted = [](const std::string& to_cmp) -> bool { return to_cmp == @@ -480,5 +433,5 @@ TEST_F(otl_agent_check_result_builder_test, test_svc_builder_2) { "metric=12;@0:50;@~:75;;"; }; - ASSERT_PRED1(compare_to_excepted, res.output); + ASSERT_PRED1(compare_to_excepted, res.get_output()); } diff --git a/engine/tests/opentelemetry/open_telemetry_test.cc b/engine/tests/opentelemetry/open_telemetry_test.cc index 14a91d5854e..76aa3e5045d 100644 --- a/engine/tests/opentelemetry/open_telemetry_test.cc +++ b/engine/tests/opentelemetry/open_telemetry_test.cc @@ -35,6 +35,8 @@ #include #include "com/centreon/common/http/http_server.hh" +#include "com/centreon/engine/checks/checker.hh" +#include "com/centreon/engine/command_manager.hh" #include "com/centreon/engine/configuration/applier/contact.hh" #include "com/centreon/engine/configuration/applier/host.hh" #include "com/centreon/engine/configuration/applier/service.hh" @@ -47,6 +49,7 @@ #include "opentelemetry/proto/common/v1/common.pb.h" #include "opentelemetry/proto/metrics/v1/metrics.pb.h" +#include "com/centreon/engine/commands/otel_connector.hh" #include "com/centreon/engine/modules/opentelemetry/open_telemetry.hh" #include "helper.hh" @@ -59,36 +62,6 @@ extern const char* telegraf_example; extern std::shared_ptr g_io_context; -class open_telemetry - : public com::centreon::engine::modules::opentelemetry::open_telemetry { - protected: - void _create_otl_server( - const grpc_config::pointer& server_conf, - const centreon_agent::agent_config::pointer&) override {} - - public: - open_telemetry(const std::string_view config_file_path, - const std::shared_ptr& io_context, - const std::shared_ptr& logger) - : com::centreon::engine::modules::opentelemetry::open_telemetry( - config_file_path, - io_context, - logger) {} - - void on_metric(const metric_request_ptr& metric) { _on_metric(metric); } - void shutdown() { _shutdown(); } - static std::shared_ptr load( - const std::string_view& config_path, - const std::shared_ptr& io_context, - const std::shared_ptr& logger) { - std::shared_ptr ret = - std::make_shared(config_path, io_context, logger); - ret->_reload(); - ret->_start_second_timer(); - return ret; - } -}; - class open_telemetry_test : public TestEngine { public: commands::otel::host_serv_list::pointer _host_serv_list; @@ -107,7 +80,7 @@ open_telemetry_test::open_telemetry_test() void open_telemetry_test::SetUpTestSuite() { std::ofstream conf_file("/tmp/otel_conf.json"); conf_file << R"({ - "server": { + "otel_server": { "host": "127.0.0.1", "port": 4317 } @@ -159,9 +132,51 @@ void open_telemetry_test::SetUp() { hst_aply.resolve_object(hst, err); svc_aply.resolve_object(svc, err); - data_point_fifo::update_fifo_limit(std::numeric_limits::max(), 10); } void open_telemetry_test::TearDown() { deinit_config_state(); } + +TEST_F(open_telemetry_test, data_available) { + auto instance = open_telemetry::load("/tmp/otel_conf.json", g_io_context, + spdlog::default_logger()); + + std::shared_ptr conn = + commands::otel_connector::create( + "otel_conn", + "--processor=nagios_telegraf --extractor=attributes " + "--host_path=resource_metrics.scope_metrics.data.data_points." + "attributes." + "host " + "--service_path=resource_metrics.scope_metrics.data.data_points." + "attributes.service", + nullptr); + conn->register_host_serv("localhost", "check_icmp"); + + metric_request_ptr request = + std::make_shared<::opentelemetry::proto::collector::metrics::v1:: + ExportMetricsServiceRequest>(); + ::google::protobuf::util::JsonStringToMessage(telegraf_example, + request.get()); + instance->on_metric(request); + command_manager::instance().execute(); + + bool checked = false; + checks::checker::instance().inspect_reap_partial( + [&checked](const std::deque& queue) { + ASSERT_FALSE(queue.empty()); + check_result::pointer res = *queue.rbegin(); + ASSERT_EQ(res->get_start_time().tv_sec, 1707744430); + ASSERT_EQ(res->get_finish_time().tv_sec, 1707744430); + ASSERT_TRUE(res->get_exited_ok()); + ASSERT_EQ(res->get_return_code(), 0); + ASSERT_EQ( + res->get_output(), + "OK|pl=0%;0:40;0:80;; rta=0.022ms;0:200;0:500;0; rtmax=0.071ms;;;; " + "rtmin=0.008ms;;;;"); + checked = true; + }); + + ASSERT_TRUE(checked); +} diff --git a/engine/tests/opentelemetry/opentelemetry_test.cc b/engine/tests/opentelemetry/opentelemetry_test.cc deleted file mode 100644 index 19d385c9214..00000000000 --- a/engine/tests/opentelemetry/opentelemetry_test.cc +++ /dev/null @@ -1,263 +0,0 @@ -/** - * Copyright 2024 Centreon - * - * This file is part of Centreon Engine. - * - * Centreon Engine is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * Centreon Engine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Centreon Engine. If not, see - * . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "com/centreon/common/http/http_server.hh" -#include "com/centreon/engine/configuration/applier/contact.hh" -#include "com/centreon/engine/configuration/applier/host.hh" -#include "com/centreon/engine/configuration/applier/service.hh" -#include "common/engine_legacy_conf/host.hh" -#include "common/engine_legacy_conf/service.hh" - -#include "opentelemetry/proto/collector/metrics/v1/metrics_service.pb.h" -#include "opentelemetry/proto/common/v1/common.pb.h" -#include "opentelemetry/proto/metrics/v1/metrics.pb.h" - -#include "com/centreon/engine/modules/opentelemetry/opentelemetry.hh" - -#include "helper.hh" -#include "test_engine.hh" - -using namespace com::centreon::engine::modules::opentelemetry; -using namespace com::centreon::engine; - -extern const char* telegraf_example; - -extern std::shared_ptr g_io_context; - -class open_telemetry - : public com::centreon::engine::modules::opentelemetry::open_telemetry { - protected: - void _create_otl_server(const grpc_config::pointer& server_conf) override {} - - public: - open_telemetry(const std::string_view config_file_path, - const std::shared_ptr& io_context, - const std::shared_ptr& logger) - : com::centreon::engine::modules::opentelemetry::open_telemetry( - config_file_path, - io_context, - logger) {} - - void on_metric(const metric_request_ptr& metric) { _on_metric(metric); } - void shutdown() { _shutdown(); } - static std::shared_ptr load( - const std::string_view& config_path, - const std::shared_ptr& io_context, - const std::shared_ptr& logger) { - std::shared_ptr ret = - std::make_shared(config_path, io_context, logger); - ret->_reload(); - ret->_start_second_timer(); - return ret; - } -}; - -class open_telemetry_test : public TestEngine { - public: - commands::otel::host_serv_list::pointer _host_serv_list; - - open_telemetry_test(); - static void SetUpTestSuite(); - void SetUp() override; - void TearDown() override; -}; - -open_telemetry_test::open_telemetry_test() - : _host_serv_list(std::make_shared()) { - _host_serv_list->register_host_serv("localhost", "check_icmp"); -} - -void open_telemetry_test::SetUpTestSuite() { - std::ofstream conf_file("/tmp/otel_conf.json"); - conf_file << R"({ - "otel_server": { - "host": "127.0.0.1", - "port": 4317 - } -} -)"; - conf_file.close(); - // spdlog::default_logger()->set_level(spdlog::level::trace); -} - -void open_telemetry_test::SetUp() { - init_config_state(); - config->contacts().clear(); - configuration::applier::contact ct_aply; - configuration::contact ctct{new_configuration_contact("admin", true)}; - ct_aply.add_object(ctct); - ct_aply.expand_objects(*config); - ct_aply.resolve_object(ctct); - - configuration::host hst{new_configuration_host("localhost", "admin")}; - configuration::applier::host hst_aply; - hst_aply.add_object(hst); - - configuration::service svc{ - new_configuration_service("localhost", "check_icmp", "admin")}; - configuration::applier::service svc_aply; - svc_aply.add_object(svc); - - hst_aply.resolve_object(hst); - svc_aply.resolve_object(svc); - data_point_fifo::update_fifo_limit(std::numeric_limits::max(), 10); -} - -void open_telemetry_test::TearDown() { - deinit_config_state(); -} - -TEST_F(open_telemetry_test, data_available) { - auto instance = ::open_telemetry::load("/tmp/otel_conf.json", g_io_context, - spdlog::default_logger()); - - instance->create_extractor( - "--extractor=attributes " - "--host_path=resource_metrics.scope_metrics.data.data_points.attributes." - "host " - "--service_path=resource_metrics.scope_metrics.data.data_points." - "attributes.service", - _host_serv_list); - - metric_request_ptr request = - std::make_shared<::opentelemetry::proto::collector::metrics::v1:: - ExportMetricsServiceRequest>(); - ::google::protobuf::util::JsonStringToMessage(telegraf_example, - request.get()); - instance->on_metric(request); - // data are now available - commands::result res; - nagios_macros macros; - macros.host_ptr = host::hosts.begin()->second.get(); - macros.service_ptr = service::services.begin()->second.get(); - ASSERT_TRUE(instance->check("nagios_telegraf", - instance->create_check_result_builder_config( - "--processor=nagios_telegraf"), - 1, macros, 1, res, - [](const commands::result&) {})); - ASSERT_EQ(res.command_id, 1); - ASSERT_EQ(res.start_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.end_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); - ASSERT_EQ(res.output, - "OK|pl=0%;0:40;0:80;; rta=0.022ms;0:200;0:500;0; rtmax=0.071ms;;;; " - "rtmin=0.008ms;;;;"); -} - -TEST_F(open_telemetry_test, timeout) { - auto instance = ::open_telemetry::load("/tmp/otel_conf.json", g_io_context, - spdlog::default_logger()); - - instance->create_extractor( - "--extractor=attributes " - "--host_path=resource_metrics.scope_metrics.data.data_points.attributes." - "host " - "--service_path=resource_metrics.scope_metrics.data.data_points." - "attributes.service", - _host_serv_list); - - commands::result res; - res.exit_status = com::centreon::process::normal; - nagios_macros macros; - macros.host_ptr = host::hosts.begin()->second.get(); - macros.service_ptr = service::services.begin()->second.get(); - std::condition_variable cv; - std::mutex cv_m; - ASSERT_FALSE(instance->check("nagios_telegraf", - instance->create_check_result_builder_config( - "--processor=nagios_telegraf"), - 1, macros, 1, res, - [&res, &cv](const commands::result& async_res) { - res = async_res; - cv.notify_one(); - })); - - std::unique_lock l(cv_m); - ASSERT_EQ(cv.wait_for(l, std::chrono::seconds(3)), - std::cv_status::no_timeout); - ASSERT_EQ(res.exit_status, com::centreon::process::timeout); -} - -TEST_F(open_telemetry_test, wait_for_data) { - auto instance = ::open_telemetry::load("/tmp/otel_conf.json", g_io_context, - spdlog::default_logger()); - - static const std::string otl_conf = - "--processor=nagios_telegraf " - "--extractor=attributes " - "--host_path=resource_metrics.scope_metrics.data.data_points.attributes." - "host " - "--service_path=resource_metrics.scope_metrics.data.data_points." - "attributes.service"; - - instance->create_extractor(otl_conf, _host_serv_list); - - commands::result res; - res.exit_status = com::centreon::process::normal; - nagios_macros macros; - macros.host_ptr = host::hosts.begin()->second.get(); - macros.service_ptr = service::services.begin()->second.get(); - std::mutex cv_m; - std::condition_variable cv; - bool data_available = instance->check( - "nagios_telegraf", instance->create_check_result_builder_config(otl_conf), - 1, macros, 1, res, [&res, &cv](const commands::result& async_res) { - res = async_res; - cv.notify_one(); - }); - ASSERT_FALSE(data_available); - - metric_request_ptr request = - std::make_shared<::opentelemetry::proto::collector::metrics::v1:: - ExportMetricsServiceRequest>(); - ::google::protobuf::util::JsonStringToMessage(telegraf_example, - request.get()); - std::thread t([instance, request]() { instance->on_metric(request); }); - - std::unique_lock l(cv_m); - ASSERT_EQ(cv.wait_for(l, std::chrono::seconds(1)), - std::cv_status::no_timeout); - ASSERT_EQ(res.command_id, 1); - ASSERT_EQ(res.start_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.end_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); - ASSERT_EQ(res.output, - "OK|pl=0%;0:40;0:80;; rta=0.022ms;0:200;0:500;0; rtmax=0.071ms;;;; " - "rtmin=0.008ms;;;;"); - t.join(); -} diff --git a/engine/tests/opentelemetry/otl_converter_test.cc b/engine/tests/opentelemetry/otl_converter_test.cc index a26615232ff..17037a4ce88 100644 --- a/engine/tests/opentelemetry/otl_converter_test.cc +++ b/engine/tests/opentelemetry/otl_converter_test.cc @@ -37,7 +37,6 @@ #include "opentelemetry/proto/common/v1/common.pb.h" #include "opentelemetry/proto/metrics/v1/metrics.pb.h" -#include "com/centreon/engine/modules/opentelemetry/data_point_fifo_container.hh" #include "com/centreon/engine/modules/opentelemetry/otl_check_result_builder.hh" #include "com/centreon/engine/modules/opentelemetry/telegraf/nagios_check_result_builder.hh" @@ -47,76 +46,13 @@ using namespace com::centreon::engine::modules::opentelemetry; using namespace com::centreon::engine; -class otl_converter_test : public TestEngine { - public: - void SetUp() override; - void TearDown() override; -}; +class otl_converter_test : public TestEngine {}; -void otl_converter_test::SetUp() { - configuration::error_cnt err; - init_config_state(); - timeperiod::timeperiods.clear(); - contact::contacts.clear(); - host::hosts.clear(); - host::hosts_by_id.clear(); - service::services.clear(); - service::services_by_id.clear(); -#ifdef LEGACY_CONF - config->contacts().clear(); -#else - pb_config.mutable_contacts()->Clear(); -#endif - configuration::applier::contact ct_aply; -#ifdef LEGACY_CONF - configuration::contact ctct{new_configuration_contact("admin", true)}; -#else - configuration::Contact ctct{new_pb_configuration_contact("admin", true)}; -#endif - ct_aply.add_object(ctct); -#ifdef LEGACY_CONF - ct_aply.expand_objects(*config); -#else - ct_aply.expand_objects(pb_config); -#endif - ct_aply.resolve_object(ctct, err); - -#ifdef LEGACY_CONF - configuration::host hst{new_configuration_host("localhost", "admin")}; -#else - configuration::Host hst{new_pb_configuration_host("localhost", "admin")}; -#endif - configuration::applier::host hst_aply; - hst_aply.add_object(hst); - -#ifdef LEGACY_CONF - configuration::service svc{ - new_configuration_service("localhost", "check_icmp", "admin")}; -#else - configuration::Service svc{ - new_pb_configuration_service("localhost", "check_icmp", "admin")}; -#endif - configuration::applier::service svc_aply; - svc_aply.add_object(svc); - - hst_aply.resolve_object(hst, err); - svc_aply.resolve_object(svc, err); - data_point_fifo::update_fifo_limit(std::numeric_limits::max(), 10); -} - -void otl_converter_test::TearDown() { - deinit_config_state(); -} - -TEST_F(otl_converter_test, empty_fifo) { - data_point_fifo_container empty; - telegraf::nagios_check_result_builder conv( - "", 1, *host::hosts.begin()->second, - service::services.begin()->second.get(), - std::chrono::system_clock::time_point(), [&](const commands::result&) {}, - spdlog::default_logger()); - commands::result res; - ASSERT_FALSE(conv.sync_build_result_from_metrics(empty, res)); +TEST_F(otl_converter_test, empty_metrics) { + telegraf::nagios_check_result_builder conv("", spdlog::default_logger()); + metric_to_datapoints empty; + check_result res; + ASSERT_FALSE(conv.build_result_from_metrics(empty, res)); } const char* telegraf_example = R"( @@ -603,38 +539,30 @@ const char* telegraf_example = R"( )"; TEST_F(otl_converter_test, nagios_telegraf) { - data_point_fifo_container received; metric_request_ptr request = std::make_shared< ::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>(); ::google::protobuf::util::JsonStringToMessage(telegraf_example, request.get()); + metric_to_datapoints received; otl_data_point::extract_data_points( request, [&](const otl_data_point& data_pt) { - received.add_data_point("localhost", "check_icmp", - data_pt.get_metric().name(), data_pt); + received[data_pt.get_metric().name()].insert(data_pt); }); - telegraf::nagios_check_result_builder conv( - "", 1, *host::hosts.begin()->second, - service::services.begin()->second.get(), - std::chrono::system_clock::time_point(), [&](const commands::result&) {}, - spdlog::default_logger()); - commands::result res; - ASSERT_TRUE(conv.sync_build_result_from_metrics(received, res)); - ASSERT_EQ(res.command_id, 1); - ASSERT_EQ(res.start_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.end_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); - ASSERT_EQ(res.output, + telegraf::nagios_check_result_builder conv("", spdlog::default_logger()); + check_result res; + ASSERT_TRUE(conv.build_result_from_metrics(received, res)); + ASSERT_EQ(res.get_start_time().tv_sec, 1707744430); + ASSERT_EQ(res.get_finish_time().tv_sec, 1707744430); + ASSERT_EQ(res.get_return_code(), 0); + ASSERT_EQ(res.get_output(), "OK|pl=0%;0:40;0:80;; rta=0.022ms;0:200;0:500;0; rtmax=0.071ms;;;; " "rtmin=0.008ms;;;;"); } TEST_F(otl_converter_test, nagios_telegraf_le_ge) { - data_point_fifo_container received; metric_request_ptr request = std::make_shared< ::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>(); @@ -646,32 +574,25 @@ TEST_F(otl_converter_test, nagios_telegraf_le_ge) { ::google::protobuf::util::JsonStringToMessage(example, request.get()); + metric_to_datapoints received; otl_data_point::extract_data_points( request, [&](const otl_data_point& data_pt) { - received.add_data_point("localhost", "check_icmp", - data_pt.get_metric().name(), data_pt); + received[data_pt.get_metric().name()].insert(data_pt); }); - telegraf::nagios_check_result_builder conv( - "", 1, *host::hosts.begin()->second, - service::services.begin()->second.get(), - std::chrono::system_clock::time_point(), [&](const commands::result&) {}, - spdlog::default_logger()); - commands::result res; - ASSERT_TRUE(conv.sync_build_result_from_metrics(received, res)); - ASSERT_EQ(res.command_id, 1); - ASSERT_EQ(res.start_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.end_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); + telegraf::nagios_check_result_builder conv("", spdlog::default_logger()); + check_result res; + ASSERT_TRUE(conv.build_result_from_metrics(received, res)); + ASSERT_EQ(res.get_start_time().tv_sec, 1707744430); + ASSERT_EQ(res.get_finish_time().tv_sec, 1707744430); + ASSERT_EQ(res.get_return_code(), 0); ASSERT_EQ( - res.output, + res.get_output(), "OK|pl=0%;0:40;@0:80;; rta=0.022ms;0:200;@0:500;0; rtmax=0.071ms;;;; " "rtmin=0.008ms;;;;"); } TEST_F(otl_converter_test, nagios_telegraf_max) { - data_point_fifo_container received; metric_request_ptr request = std::make_shared< ::opentelemetry::proto::collector::metrics::v1:: ExportMetricsServiceRequest>(); @@ -680,25 +601,19 @@ TEST_F(otl_converter_test, nagios_telegraf_max) { ::google::protobuf::util::JsonStringToMessage(example, request.get()); + metric_to_datapoints received; otl_data_point::extract_data_points( request, [&](const otl_data_point& data_pt) { - received.add_data_point("localhost", "check_icmp", - data_pt.get_metric().name(), data_pt); + received[data_pt.get_metric().name()].insert(data_pt); }); - telegraf::nagios_check_result_builder conv( - "", 1, *host::hosts.begin()->second, - service::services.begin()->second.get(), - std::chrono::system_clock::time_point(), [&](const commands::result&) {}, - spdlog::default_logger()); - commands::result res; - ASSERT_TRUE(conv.sync_build_result_from_metrics(received, res)); - ASSERT_EQ(res.command_id, 1); - ASSERT_EQ(res.start_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.end_time.to_useconds(), 1707744430000000); - ASSERT_EQ(res.exit_code, 0); - ASSERT_EQ(res.exit_status, com::centreon::process::normal); - ASSERT_EQ(res.output, + telegraf::nagios_check_result_builder conv("", spdlog::default_logger()); + check_result res; + ASSERT_TRUE(conv.build_result_from_metrics(received, res)); + ASSERT_EQ(res.get_start_time().tv_sec, 1707744430); + ASSERT_EQ(res.get_finish_time().tv_sec, 1707744430); + ASSERT_EQ(res.get_return_code(), 0); + ASSERT_EQ(res.get_output(), "OK|pl=0%;0:40;0:80;; rta=0.022ms;0:200;0:500;;0 rtmax=0.071ms;;;; " "rtmin=0.008ms;;;;"); } diff --git a/tests/broker-engine/opentelemetry.robot b/tests/broker-engine/opentelemetry.robot index 5ccc6da9eed..b2df58bb231 100644 --- a/tests/broker-engine/opentelemetry.robot +++ b/tests/broker-engine/opentelemetry.robot @@ -89,6 +89,7 @@ BEOTEL_TELEGRAF_CHECK_HOST ... OTEL connector ... opentelemetry --processor=nagios_telegraf --extractor=attributes --host_path=resource_metrics.scope_metrics.data.data_points.attributes.host --service_path=resource_metrics.scope_metrics.data.data_points.attributes.service Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp + Ctn Set Hosts Passive ${0} host_1 Ctn Engine Config Add Command ... ${0} ... otel_check_icmp @@ -117,28 +118,10 @@ BEOTEL_TELEGRAF_CHECK_HOST ${resources_list} Ctn Create Otl Request ${0} host_1 - # check without feed - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 - ${result} Ctn Check Host Output Resource Status With Timeout - ... host_1 - ... 35 - ... ${start} - ... 0 - ... HARD - ... (No output returned from host check) - Should Be True ${result} hosts table not updated - - Log To Console export metrics - Ctn Send Otl To Engine 4317 ${resources_list} - - Sleep 5 - - # feed and check ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 + Ctn Send Otl To Engine 4317 ${resources_list} ${result} Ctn Check Host Output Resource Status With Timeout host_1 30 ${start} 0 HARD OK Should Be True ${result} hosts table not updated @@ -149,30 +132,23 @@ BEOTEL_TELEGRAF_CHECK_HOST Sleep 5 - - # feed and check - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 - - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK - Should Be True ${result} hosts table not updated - # check then feed, three times to modify hard state ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 Sleep 2 ${resources_list} Ctn Create Otl Request ${2} host_1 Ctn Send Otl To Engine 4317 ${resources_list} - Ctn Schedule Forced Host Check host_1 - Sleep 2 + + ${result} Ctn Check Host Output Resource Status With Timeout host_1 30 ${start} 1 SOFT CRITICAL + Should Be True ${result} hosts table not updated + ${resources_list} Ctn Create Otl Request ${2} host_1 Ctn Send Otl To Engine 4317 ${resources_list} - Ctn Schedule Forced Host Check host_1 + + Sleep 2 ${resources_list} Ctn Create Otl Request ${2} host_1 Ctn Send Otl To Engine 4317 ${resources_list} - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 1 CRITICAL - + ${result} Ctn Check Host Output Resource Status With Timeout host_1 30 ${start} 1 HARD CRITICAL Should Be True ${result} hosts table not updated BEOTEL_TELEGRAF_CHECK_SERVICE @@ -185,6 +161,7 @@ BEOTEL_TELEGRAF_CHECK_SERVICE ... OTEL connector ... opentelemetry --processor=nagios_telegraf --extractor=attributes --host_path=resource_metrics.scope_metrics.data.data_points.attributes.host --service_path=resource_metrics.scope_metrics.data.data_points.attributes.service Ctn Engine Config Replace Value In Services ${0} service_1 check_command otel_check_icmp + Ctn Set Services Passive 0 service_1 Ctn Engine Config Add Command ... ${0} ... otel_check_icmp @@ -213,60 +190,29 @@ BEOTEL_TELEGRAF_CHECK_SERVICE ${resources_list} Ctn Create Otl Request ${0} host_1 service_1 - # check without feed - - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Svc Check host_1 service_1 - ${result} Ctn Check Service Output Resource Status With Timeout - ... host_1 - ... service_1 - ... 35 - ... ${start} - ... 0 - ... HARD - ... (No output returned from plugin) - Should Be True ${result} services table not updated - - Log To Console export metrics - Ctn Send Otl To Engine 4317 ${resources_list} - - Sleep 5 - # feed and check ${start} Ctn Get Round Current Date - Ctn Schedule Forced Svc Check host_1 service_1 - - ${result} Ctn Check Service Output Resource Status With Timeout host_1 service_1 30 ${start} 0 HARD OK - Should Be True ${result} services table not updated - Log To Console export metrics Ctn Send Otl To Engine 4317 ${resources_list} - Sleep 5 - - # feed and check - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Svc Check host_1 service_1 - - ${result} Ctn Check Service Check Status With Timeout host_1 service_1 30 ${start} 0 OK + ${result} Ctn Check Service Output Resource Status With Timeout host_1 service_1 30 ${start} 0 HARD OK Should Be True ${result} services table not updated # check then feed, three times to modify hard state ${start} Ctn Get Round Current Date ${resources_list} Ctn Create Otl Request ${2} host_1 service_1 Ctn Send Otl To Engine 4317 ${resources_list} - Sleep 2 - Ctn Schedule Forced Svc Check host_1 service_1 + + ${result} Ctn Check Service Output Resource Status With Timeout host_1 service_1 30 ${start} 2 SOFT CRITICAL + Should Be True ${result} services table not updated + ${resources_list} Ctn Create Otl Request ${2} host_1 service_1 Ctn Send Otl To Engine 4317 ${resources_list} + Sleep 2 - Ctn Schedule Forced Svc Check host_1 service_1 ${resources_list} Ctn Create Otl Request ${2} host_1 service_1 Ctn Send Otl To Engine 4317 ${resources_list} - Sleep 2 - Ctn Schedule Forced Svc Check host_1 service_1 ${result} Ctn Check Service Output Resource Status With Timeout host_1 service_1 30 ${start} 2 HARD CRITICAL - Should Be True ${result} services table not updated BEOTEL_SERVE_TELEGRAF_CONFIGURATION_CRYPTED @@ -276,7 +222,7 @@ BEOTEL_SERVE_TELEGRAF_CONFIGURATION_CRYPTED Ctn Config Engine ${1} ${3} ${2} Ctn Add Otl ServerModule ... 0 - ... {"otel_server":{"host": "0.0.0.0","port": 4317},"max_length_grpc_log":0, "telegraf_conf_server": {"http_server":{"port": 1443, "encryption": true, "certificate_path": "/tmp/otel/server.crt", "key_path": "/tmp/otel/server.key"}, "cehck_interval":60, "engine_otel_endpoint": "127.0.0.1:4317"}} + ... {"otel_server":{"host": "0.0.0.0","port": 4317},"max_length_grpc_log":0, "telegraf_conf_server": {"http_server":{"port": 1443, "encryption": true, "certificate_path": "/tmp/otel/server.crt", "key_path": "/tmp/otel/server.key"}, "check_interval":60, "engine_otel_endpoint": "127.0.0.1:4317"}} Ctn Config Add Otl Connector ... 0 ... OTEL connector @@ -431,6 +377,7 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST ... OTEL connector ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp + Ctn Set Hosts Passive ${0} host_1 Ctn Engine Config Add Command ... ${0} ... otel_check_icmp @@ -449,6 +396,7 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST Ctn Clear Retention ${start} Get Current Date + ${start_int} Ctn Get Round Current Date Ctn Start Broker Ctn Start Engine Ctn Start Agent @@ -458,16 +406,7 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 Should Be True ${result} "unencrypted server listening on 0.0.0.0:4317" should be available. - # We wait for the connection with the agent before scheduling the check, otherwise the delay between the scheduling and the real time of the check is too long and makes the test to fail. - ${start} Ctn Get Round Current Date - ${content} Create List connected with agent - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 - Should Be True ${result} Engine seems not connected to the agent - Sleep 10s - - Ctn Schedule Forced Host Check host_1 - - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK - 127.0.0.1 + ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start_int} 0 OK - 127.0.0.1 Should Be True ${result} hosts table not updated Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp_2 @@ -487,9 +426,6 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 22 Should Be True ${result} "description: "OK check2" should be available. - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK check2 - 127.0.0.1: rta 0,010ms, lost 0% Should Be True ${result} hosts table not updated @@ -506,6 +442,7 @@ BEOTEL_CENTREON_AGENT_CHECK_SERVICE ... OTEL connector ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name Ctn Engine Config Replace Value In Services ${0} service_1 check_command otel_check + Ctn Set Services Passive 0 service_1 Ctn Engine Config Add Command ... ${0} ... otel_check @@ -527,6 +464,7 @@ BEOTEL_CENTREON_AGENT_CHECK_SERVICE Ctn Clear Retention ${start} Ctn Get Round Current Date + ${start_int} Ctn Get Round Current Date Ctn Start Broker Ctn Start Engine Ctn Start Agent @@ -536,25 +474,13 @@ BEOTEL_CENTREON_AGENT_CHECK_SERVICE ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 Should Be True ${result} "unencrypted server listening on 0.0.0.0:4317" should be available. - ${content} Create List fifos:{"host_1,service_1" - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 - Should Be True ${result} fifos not found in logs - - Ctn Schedule Forced Svc Check host_1 service_1 - - ${result} Ctn Check Service Check Status With Timeout host_1 service_1 60 ${start} 2 Test check 456 + ${result} Ctn Check Service Check Status With Timeout host_1 service_1 60 ${start_int} 2 Test check 456 Should Be True ${result} services table not updated ${start} Ctn Get Round Current Date #service_1 check ok Ctn Set Command Status 456 ${0} - ${content} Create List as_int: 0 - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 - Should Be True ${result} status 0 not found in logs - - Ctn Schedule Forced Svc Check host_1 service_1 - ${result} Ctn Check Service Check Status With Timeout host_1 service_1 60 ${start} 0 Test check 456 Should Be True ${result} services table not updated @@ -571,6 +497,7 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_HOST ... OTEL connector ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp + Ctn Set Hosts Passive ${0} host_1 Ctn Engine Config Add Command ... ${0} ... otel_check_icmp @@ -589,6 +516,7 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_HOST Ctn Clear Retention ${start} Get Current Date + ${start_int} Ctn Get Round Current Date Ctn Start Broker Ctn Start Engine Ctn Start Agent @@ -597,12 +525,8 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_HOST ${content} Create List init from [.\\s]*127.0.0.1:4317 ${result} Ctn Find Regex In Log With Timeout ${engineLog0} ${start} ${content} 10 Should Be True ${result} "init from localhost:4317" not found in log - Sleep 1 - - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK - 127.0.0.1 + ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start_int} 0 OK - 127.0.0.1 Should Be True ${result} hosts table not updated Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp_2 @@ -622,9 +546,6 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_HOST ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 Should Be True ${result} "description: "OK check2" should be available. - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK check2 - 127.0.0.1: rta 0,010ms, lost 0% Should Be True ${result} hosts table not updated @@ -641,6 +562,7 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_SERVICE ... OTEL connector ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name Ctn Engine Config Replace Value In Services ${0} service_1 check_command otel_check + Ctn Set Services Passive 0 service_1 Ctn Engine Config Add Command ... ${0} ... otel_check @@ -662,6 +584,7 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_SERVICE Ctn Clear Retention ${start} Ctn Get Round Current Date + ${start_int} Ctn Get Round Current Date Ctn Start Broker Ctn Start Engine Ctn Start Agent @@ -672,25 +595,13 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_SERVICE Should Be True ${result} "init from 127.0.0.1:4317" not found in log - ${content} Create List fifos:{"host_1,service_1" - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 - Should Be True ${result} fifos not found in logs - - Ctn Schedule Forced Svc Check host_1 service_1 - - ${result} Ctn Check Service Check Status With Timeout host_1 service_1 60 ${start} 2 Test check 456 + ${result} Ctn Check Service Check Status With Timeout host_1 service_1 60 ${start_int} 2 Test check 456 Should Be True ${result} services table not updated ${start} Ctn Get Round Current Date #service_1 check ok Ctn Set Command Status 456 ${0} - ${content} Create List as_int: 0 - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 30 - Should Be True ${result} status 0 not found in logs - - Ctn Schedule Forced Svc Check host_1 service_1 - ${result} Ctn Check Service Check Status With Timeout host_1 service_1 60 ${start} 0 Test check 456 Should Be True ${result} services table not updated @@ -709,6 +620,7 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST_CRYPTED ... OTEL connector ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp + Ctn Set Hosts Passive ${0} host_1 Ctn Engine Config Add Command ... ${0} ... otel_check_icmp @@ -727,6 +639,7 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST_CRYPTED Ctn Clear Retention ${start} Get Current Date + ${start_int} Ctn Get Round Current Date Ctn Start Broker Ctn Start Engine Ctn Start Agent @@ -736,16 +649,7 @@ BEOTEL_CENTREON_AGENT_CHECK_HOST_CRYPTED ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 Should Be True ${result} "encrypted server listening on 0.0.0.0:4317" should be available. - # We wait for the connection with the agent before scheduling the check, otherwise the delay between the scheduling and the real time of the check is too long and makes the test to fail. - ${start} Ctn Get Round Current Date - ${content} Create List connected with agent - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 - Should Be True ${result} Engine seems not connected to the agent - Sleep 10s - - Ctn Schedule Forced Host Check host_1 - - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK - 127.0.0.1 + ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start_int} 0 OK - 127.0.0.1 Should Be True ${result} hosts table not updated @@ -767,6 +671,7 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_HOST_CRYPTED ... OTEL connector ... opentelemetry --processor=centreon_agent --extractor=attributes --host_path=resource_metrics.resource.attributes.host.name --service_path=resource_metrics.resource.attributes.service.name Ctn Engine Config Replace Value In Hosts ${0} host_1 check_command otel_check_icmp + Ctn Set Hosts Passive ${0} host_1 Ctn Engine Config Add Command ... ${0} ... otel_check_icmp @@ -785,20 +690,18 @@ BEOTEL_REVERSE_CENTREON_AGENT_CHECK_HOST_CRYPTED Ctn Clear Retention ${start} Get Current Date + ${start_int} Ctn Get Round Current Date Ctn Start Broker Ctn Start Engine Ctn Start Agent # Let's wait for engine to connect to agent ${content} Create List init from localhost:4317 - ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 10 + ${result} Ctn Find In Log With Timeout ${engineLog0} ${start} ${content} 15 Should Be True ${result} "init from localhost:4317" not found in log Sleep 1 - ${start} Ctn Get Round Current Date - Ctn Schedule Forced Host Check host_1 - - ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start} 0 OK - 127.0.0.1 + ${result} Ctn Check Host Check Status With Timeout host_1 30 ${start_int} 0 OK - 127.0.0.1 Should Be True ${result} hosts table not updated diff --git a/tests/resources/Engine.py b/tests/resources/Engine.py index 3837cffba19..7213a4558b1 100755 --- a/tests/resources/Engine.py +++ b/tests/resources/Engine.py @@ -3552,7 +3552,7 @@ def ctn_add_data_point_to_metric(metric, attrib:dict, metric_value = None): """ data_point = metric.gauge.data_points.add() - data_point.time_unix_nano = int(time.time()) + data_point.time_unix_nano = int(time.time()) * 1000000000 if metric_value is not None: data_point.as_double = metric_value else: