From 7ed3fbb05ad87515f5a791f0971e8a23611aa14e Mon Sep 17 00:00:00 2001 From: Anton Tolchanov Date: Wed, 17 Apr 2024 15:16:40 +0200 Subject: [PATCH] Remove size limit for per-metric lookup tables Size based cleanup was added to avoid unbounded growth of lookup tables as metrics get deleted/reset. This should not be necessary now when lookup tables are being reset on metric deletion. This reverts eb1876d67533abea39f006147441c5df51ef428d --- README.md | 6 ------ prometheus.lua | 25 ++----------------------- prometheus_test.lua | 38 -------------------------------------- 3 files changed, 2 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 5423157..de34300 100644 --- a/README.md +++ b/README.md @@ -106,12 +106,6 @@ section of nginx configuration. * `sync_interval` (number): sets the sync interval for per-worker counters and key index (in seconds). This sets the boundary on eventual consistency of counter metric increments, and metric resets/deletions. Defaults to 1. - * `lookup_max_size` (number): maximum size of a per-metric lookup table - maintained by each worker to cache full metric names. Defaults to 1000. - If you have metrics with extremely high cardinality and lots of available - RAM, you might want to increase this to avoid cache getting flushed too - often. Decreasing this makes sense if you have a very large number of - metrics or need to minimize memory usage of this library. Returns a `prometheus` object that should be used to register metrics. diff --git a/prometheus.lua b/prometheus.lua index fc1260e..5d90dc6 100644 --- a/prometheus.lua +++ b/prometheus.lua @@ -92,9 +92,6 @@ local DEFAULT_ERROR_METRIC_NAME = "nginx_metric_errors_total" -- Default value for per-worker counter sync interval (seconds). local DEFAULT_SYNC_INTERVAL = 1 --- Default max size of lookup table -local DEFAULT_LOOKUP_MAX_SIZE = 1000 - -- Default set of latency buckets, 5ms to 10s: local DEFAULT_BUCKETS = {0.005, 0.01, 0.02, 0.03, 0.05, 0.075, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1, 1.5, 2, 3, 4, 5, 10} @@ -383,10 +380,6 @@ local function lookup_or_create(self, label_values) self.name, self.label_count, cnt, table_to_string(label_values)) end - if self.lookup_size >= self.lookup_max_size then - self:reset_lookup() - end - local t = self.lookup if label_values then -- Don't use ipairs here to avoid inner loop generates trace first @@ -444,8 +437,6 @@ local function lookup_or_create(self, label_values) end t[LEAF_KEY] = full_name - self.lookup_size = self.lookup_size + 1 - local err = self._key_index:add(full_name, ERR_MSG_LRU_EVICTION) if err then return nil, err @@ -622,12 +613,6 @@ local function observe(self, value, label_values) end end --- Reset the metric name lookup table for a given metric. -local function reset_lookup(self) - self.lookup_size = 0 - self.lookup = {} -end - -- Delete all metrics for a given gauge, counter or a histogram. -- -- This is like `del`, but will delete all time series for all previously @@ -696,7 +681,7 @@ local function reset(self) end -- Clean up the full metric name lookup table as well. - self:reset_lookup() + self.lookup = {} end -- Initialize the module. @@ -733,13 +718,10 @@ function Prometheus.init(dict_name, options_or_prefix) DEFAULT_ERROR_METRIC_NAME self.sync_interval = options_or_prefix.sync_interval or DEFAULT_SYNC_INTERVAL - self.lookup_max_size = options_or_prefix.lookup_max_size or - DEFAULT_LOOKUP_MAX_SIZE else self.prefix = options_or_prefix or '' self.error_metric_name = DEFAULT_ERROR_METRIC_NAME self.sync_interval = DEFAULT_SYNC_INTERVAL - self.lookup_max_size = DEFAULT_LOOKUP_MAX_SIZE end self.registry = {} @@ -756,7 +738,7 @@ function Prometheus.init(dict_name, options_or_prefix) metric_name = ngx_re_gsub(metric_name, "_sum$", "", "jo") local m = self.registry[metric_name] if m and m.typ == TYPE_HISTOGRAM then - m:reset_lookup() + m.lookup = {} end end) @@ -866,9 +848,6 @@ local function register(self, name, help, label_names, buckets, typ) -- ['my.net']['200'][LEAF_KEY] = 'http_count{host="my.net",status="200"}' -- ['my.net']['500'][LEAF_KEY] = 'http_count{host="my.net",status="500"}' lookup = {}, - lookup_size = 0, - lookup_max_size = self.lookup_max_size, - reset_lookup = reset_lookup, parent = self, -- Store a reference for logging functions for faster lookup. _log_error = function(...) self:log_error(...) end, diff --git a/prometheus_test.lua b/prometheus_test.lua index ba98b1b..2eb19b9 100644 --- a/prometheus_test.lua +++ b/prometheus_test.lua @@ -849,44 +849,6 @@ function TestKeyIndex:testSync() luaunit.assertEquals(self.last_deleted_key, "key2") end -function TestPrometheus:testLookupMaxSize() - local c = self.p:counter("testc", "Counter", {"tag"}) - local g = self.p:gauge("testg", "Gauge", {"tag"}) - local h = self.p:histogram("testh", "Histogram", {"tag"}) - - luaunit.assertEquals(c.lookup_size, 0) - luaunit.assertEquals(g.lookup_size, 0) - luaunit.assertEquals(h.lookup_size, 0) - luaunit.assertEquals(c.lookup_max_size, 1000) - luaunit.assertEquals(g.lookup_max_size, 1000) - luaunit.assertEquals(h.lookup_max_size, 1000) - - for _, max_size in ipairs({10, 17, 101}) do - local p1 = require('prometheus').init("metrics", - {lookup_max_size=max_size}) - local c1 = p1:counter("testc1", "Counter", {"tag"}) - local g1 = p1:gauge("testg1", "Gauge", {"tag"}) - local h1 = p1:histogram("testh1", "Histogram", {"tag"}) - - luaunit.assertEquals(p1.lookup_max_size, max_size) - luaunit.assertEquals(c1.lookup_max_size, max_size) - luaunit.assertEquals(g1.lookup_max_size, max_size) - luaunit.assertEquals(h1.lookup_max_size, max_size) - - for i = 0, max_size * 2.1, 1 do - c1:inc(1, {tostring(i)}) - g1:set(3, {tostring(i)}) - h1:observe(50, {tostring(i)}) - - luaunit.assertEquals(c1.lookup_size, (i % max_size)+1) - luaunit.assertEquals(g1.lookup_size, (i % max_size)+1) - luaunit.assertEquals(h1.lookup_size, (i % max_size)+1) - end - end - - luaunit.assertEquals(self.dict:get("nginx_metric_errors_total"), 0) -end - function TestPrometheus.testPrintfTable() local p = require('prometheus') luaunit.assertEquals(p._table_to_string(nil), "nil")