From 706b4e4c3a385c7546046c11824d462b13056a98 Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Tue, 20 Aug 2024 14:44:57 +0200 Subject: [PATCH] dnsdist: Stop reporting timeouts in `topSlow()`, add `topTimeouts()` Until this commit `topSlow()` returned queries that timed out, which is not very helpful. This was happening because timeouts are internally recorded with a very high response time. With this change, `topSlow()` now ignores queries that timed out, and a new command is added to look into these: `topTimeouts()`. --- pdns/dnsdistdist/dnsdist-console.cc | 3 ++- pdns/dnsdistdist/dnsdist-lua-inspection.cc | 13 ++++++++++--- pdns/dnsdistdist/docs/reference/config.rst | 13 +++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pdns/dnsdistdist/dnsdist-console.cc b/pdns/dnsdistdist/dnsdist-console.cc index d1d273853511a..12926eeedf94d 100644 --- a/pdns/dnsdistdist/dnsdist-console.cc +++ b/pdns/dnsdistdist/dnsdist-console.cc @@ -844,7 +844,8 @@ static const std::vector s_consoleKeywords {"topResponseRules", true, "[top][, vars]", "show `top` response rules"}, {"topRules", true, "[top][, vars]", "show `top` rules"}, {"topSelfAnsweredResponseRules", true, "[top][, vars]", "show `top` self-answered response rules"}, - {"topSlow", true, "[top][, limit][, labels]", "show `top` queries slower than `limit` milliseconds, grouped by last `labels` labels"}, + {"topSlow", true, "[top][, limit][, labels]", "show `top` queries slower than `limit` milliseconds (timeouts excepted), grouped by last `labels` labels"}, + {"topTimeouts", true, "[top][, labels]", "show `top` queries that timed out, grouped by last `labels` labels"}, {"TrailingDataRule", true, "", "Matches if the query has trailing data"}, {"truncateTC", true, "bool", "if set (defaults to no starting with dnsdist 1.2.0) truncate TC=1 answers so they are actually empty. Fixes an issue for PowerDNS Authoritative Server 2.9.22. Note: turning this on breaks compatibility with RFC 6891."}, {"unregisterDynBPFFilter", true, "DynBPFFilter", "unregister this dynamic BPF filter"}, diff --git a/pdns/dnsdistdist/dnsdist-lua-inspection.cc b/pdns/dnsdistdist/dnsdist-lua-inspection.cc index f3cef29f1e4da..72efbef6c651e 100644 --- a/pdns/dnsdistdist/dnsdist-lua-inspection.cc +++ b/pdns/dnsdistdist/dnsdist-lua-inspection.cc @@ -518,11 +518,18 @@ void setupLuaInspection(LuaContext& luaCtx) luaCtx.executeCode(R"(function topResponses(top, kind, labels) top = top or 10; kind = kind or 0; for k,v in ipairs(getTopResponses(top, kind, labels)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); - luaCtx.writeFunction("getSlowResponses", [](uint64_t top, uint64_t msec, boost::optional labels) { - return getGenResponses(top, labels, [msec](const Rings::Response& resp) { return resp.usec > msec * 1000; }); + luaCtx.writeFunction("getSlowResponses", [](uint64_t top, uint64_t msec, boost::optional labels, boost::optional timeouts) { + return getGenResponses(top, labels, [msec,timeouts](const Rings::Response& resp) { + if (timeouts && *timeouts) { + return resp.usec == std::numeric_limits::max(); + } + return resp.usec > msec * 1000 && resp.usec != std::numeric_limits::max(); + }); }); - luaCtx.executeCode(R"(function topSlow(top, msec, labels) top = top or 10; msec = msec or 500; for k,v in ipairs(getSlowResponses(top, msec, labels)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); + luaCtx.executeCode(R"(function topSlow(top, msec, labels) top = top or 10; msec = msec or 500; for k,v in ipairs(getSlowResponses(top, msec, labels, false)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); + + luaCtx.executeCode(R"(function topTimeouts(top, labels) top = top or 10; for k,v in ipairs(getSlowResponses(top, 0, labels, true)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); luaCtx.writeFunction("getTopBandwidth", [](uint64_t top) { setLuaNoSideEffect(); diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst index 969b45db1242e..fb3c8c9518642 100644 --- a/pdns/dnsdistdist/docs/reference/config.rst +++ b/pdns/dnsdistdist/docs/reference/config.rst @@ -1504,6 +1504,9 @@ Status, Statistics and More .. function:: topSlow([num[, limit[, labels]]]) + .. versionchanged:: 1.9.7 + queries that timed out are no longer reported by ``topSlow``, see :func:`topTimeouts` instead + Print the ``num`` slowest queries that are slower than ``limit`` milliseconds. Optionally grouped by the rightmost ``labels`` DNS labels. @@ -1511,6 +1514,16 @@ Status, Statistics and More :param int limit: Show queries slower than this amount of milliseconds, defaults to 2000 :param int label: Number of labels to cut down to +.. function:: topTimeouts([num[, labels]]) + + .. versionadded:: 1.9.7 + + Print the ``num`` queries that timed out the most. + Optionally grouped by the rightmost ``labels`` DNS labels. + + :param int num: Number to show, defaults to 10 + :param int label: Number of labels to cut down to + .. _dynblocksref: Dynamic Blocks