From ac6ff835fc5691eb56e526af7dfeaf3558b42c0b Mon Sep 17 00:00:00 2001 From: Malte Isberner <2822367+misberner@users.noreply.github.com> Date: Tue, 23 Jun 2020 19:44:46 +0200 Subject: [PATCH] event timers (#326) --- collector/lib/CollectorStatsExporter.cpp | 34 ++++++++++++++++++++++++ collector/lib/Sysdig.h | 4 +++ collector/lib/SysdigService.cpp | 8 ++++++ 3 files changed, 46 insertions(+) diff --git a/collector/lib/CollectorStatsExporter.cpp b/collector/lib/CollectorStatsExporter.cpp index f3eb1e4bf3..2a2ca662ba 100644 --- a/collector/lib/CollectorStatsExporter.cpp +++ b/collector/lib/CollectorStatsExporter.cpp @@ -79,11 +79,27 @@ void CollectorStatsExporter::run() { .Help("Collector events by event type") .Register(*registry_); + auto& collectorTypedEventTimesTotal = prometheus::BuildGauge() + .Name("rox_collector_event_times_us_total") + .Help("Collector event timings (total)") + .Register(*registry_); + + auto& collectorTypedEventTimesAvg = prometheus::BuildGauge() + .Name("rox_collector_event_times_us_avg") + .Help("Collector event timings (average)") + .Register(*registry_); + struct { prometheus::Gauge* filtered = nullptr; prometheus::Gauge* userspace = nullptr; prometheus::Gauge* chiselCacheHitsAccept = nullptr; prometheus::Gauge* chiselCacheHitsReject = nullptr; + + prometheus::Gauge* parse_micros_total = nullptr; + prometheus::Gauge* process_micros_total = nullptr; + + prometheus::Gauge* parse_micros_avg = nullptr; + prometheus::Gauge* process_micros_avg = nullptr; } typed[PPM_EVENT_MAX] = {}; const auto& active_syscalls = config_->Syscalls(); @@ -107,6 +123,16 @@ void CollectorStatsExporter::run() { std::map{{"quantity", "chiselCacheHitsAccept"}, {"event_type", event_name}, {"event_dir", event_dir}}); typed[i].chiselCacheHitsReject = &collectorTypedEventCounters.Add( std::map{{"quantity", "chiselCacheHitsReject"}, {"event_type", event_name}, {"event_dir", event_dir}}); + + typed[i].parse_micros_total = &collectorTypedEventTimesTotal.Add( + std::map{{"step", "parse"}, {"event_type", event_name}, {"event_dir", event_dir}}); + typed[i].process_micros_total = &collectorTypedEventTimesTotal.Add( + std::map{{"step", "process"}, {"event_type", event_name}, {"event_dir", event_dir}}); + + typed[i].parse_micros_avg = &collectorTypedEventTimesAvg.Add( + std::map{{"step", "parse"}, {"event_type", event_name}, {"event_dir", event_dir}}); + typed[i].process_micros_avg = &collectorTypedEventTimesAvg.Add( + std::map{{"step", "process"}, {"event_type", event_name}, {"event_dir", event_dir}}); } while (thread_.Pause(std::chrono::seconds(5))) { @@ -127,6 +153,8 @@ void CollectorStatsExporter::run() { auto userspace = stats.nUserspaceEvents[i]; auto chiselCacheHitsAccept = stats.nChiselCacheHitsAccept[i]; auto chiselCacheHitsReject = stats.nChiselCacheHitsReject[i]; + auto parse_micros_total = stats.event_parse_micros[i]; + auto process_micros_total = stats.event_process_micros[i]; nFiltered += filtered; nUserspace += userspace; @@ -137,6 +165,12 @@ void CollectorStatsExporter::run() { if (counters.userspace) counters.userspace->Set(userspace); if (counters.chiselCacheHitsAccept) counters.chiselCacheHitsAccept->Set(chiselCacheHitsAccept); if (counters.chiselCacheHitsReject) counters.chiselCacheHitsReject->Set(chiselCacheHitsReject); + + if (counters.parse_micros_total) counters.parse_micros_total->Set(parse_micros_total); + if (counters.process_micros_total) counters.process_micros_total->Set(process_micros_total); + + if (counters.parse_micros_avg) counters.parse_micros_avg->Set(userspace ? parse_micros_total / userspace : 0); + if (counters.process_micros_avg) counters.process_micros_avg->Set(filtered ? process_micros_total / filtered : 0); } filtered.Set(nFiltered); diff --git a/collector/lib/Sysdig.h b/collector/lib/Sysdig.h index 41bb88074c..9c71378f07 100644 --- a/collector/lib/Sysdig.h +++ b/collector/lib/Sysdig.h @@ -57,6 +57,10 @@ struct SysdigStats { volatile uint64_t nProcessResolutionFailuresByEvt = 0; // number of process signals failed to resolve by event* volatile uint64_t nProcessResolutionFailuresByTinfo = 0; // number of process signals failed to resolve by tinfo* volatile uint64_t nProcessRateLimitCount = 0; // number of process signals rate limited + + // Timing metrics + volatile uint64_t event_parse_micros[PPM_EVENT_MAX] = {0}; // total microseconds spent parsing event type (correlates w/ nUserspaceEvents) + volatile uint64_t event_process_micros[PPM_EVENT_MAX] = {0}; // total microseconds spent processing event type (correlates w/ nFilteredevents) }; class Sysdig { diff --git a/collector/lib/SysdigService.cpp b/collector/lib/SysdigService.cpp index 219d803918..fb873f87f0 100644 --- a/collector/lib/SysdigService.cpp +++ b/collector/lib/SysdigService.cpp @@ -34,6 +34,7 @@ You should have received a copy of the GNU General Public License along with thi #include "Logging.h" #include "NetworkSignalHandler.h" #include "Utility.h" +#include "TimeUtil.h" namespace collector { @@ -123,12 +124,16 @@ bool SysdigService::FilterEvent(sinsp_evt* event) { sinsp_evt* SysdigService::GetNext() { sinsp_evt* event; + + auto parse_start = NowMicros(); auto res = inspector_->next(&event); if (res != SCAP_SUCCESS) return nullptr; if (event->get_category() & EC_INTERNAL) return nullptr; + userspace_stats_.event_parse_micros[event->get_type()] += (NowMicros() - parse_start); ++userspace_stats_.nUserspaceEvents[event->get_type()]; + if (!FilterEvent(event)) { return nullptr; } @@ -171,6 +176,7 @@ void SysdigService::Run(const std::atomic& contr sinsp_evt* evt = GetNext(); if (!evt) continue; + auto process_start = NowMicros(); for (auto& signal_handler : signal_handlers_) { if (!signal_handler.ShouldHandle(evt)) continue; auto result = signal_handler.handler->HandleSignal(evt); @@ -181,6 +187,8 @@ void SysdigService::Run(const std::atomic& contr result = signal_handler.handler->HandleSignal(evt); } } + + userspace_stats_.event_process_micros[evt->get_type()] += (NowMicros() - process_start); } }