From 84e11c856263f9cb1516b1d958ff7652463885ae Mon Sep 17 00:00:00 2001 From: Mikhail Alpinskiy Date: Wed, 27 Nov 2024 17:30:52 +0300 Subject: [PATCH] Query duration TOP --- cmd/statshouse-api/statshouse-api.go | 3 +- internal/api/handler.go | 102 ++++++++++++++++++++++----- internal/api/http_router.go | 47 ++++++++++-- internal/api/promql.go | 2 +- 4 files changed, 127 insertions(+), 27 deletions(-) diff --git a/cmd/statshouse-api/statshouse-api.go b/cmd/statshouse-api/statshouse-api.go index 7ee3828ea..456cd8501 100644 --- a/cmd/statshouse-api/statshouse-api.go +++ b/cmd/statshouse-api/statshouse-api.go @@ -406,7 +406,8 @@ func run(argv args, cfg *api.Config, vkuthPublicKeys map[string][]byte) error { m.Path("/debug/pprof/trace").Methods("GET").HandlerFunc(api.HandleProfTrace) m.Path("/debug/pprof/symbol").Methods("GET").HandlerFunc(api.HandleProfSymbol) m.Path("/debug/500").Methods("GET").HandlerFunc(api.DumpInternalServerErrors) - m.Path("/debug/top").Methods("GET").HandlerFunc(api.DumpQueryTop) + m.Path("/debug/top/mem").Methods("GET").HandlerFunc(api.DumpQueryTopMemUsage) + m.Path("/debug/top/time").Methods("GET").HandlerFunc(api.DumpQueryTopDuration) m.Path("/debug/tag/draft").Methods("GET").HandlerFunc(api.HandleTagDraftList) m.Router.PathPrefix("/").Methods("GET", "HEAD").HandlerFunc(f.HandleStatic) diff --git a/internal/api/handler.go b/internal/api/handler.go index 019b86b57..1b88548a8 100644 --- a/internal/api/handler.go +++ b/internal/api/handler.go @@ -211,13 +211,22 @@ type ( errorX int // TOP queries by memory usage - queryTop []queryInfo - queryTopMu sync.Mutex + queryTopMemUsage []queryTopMemUsage + queryTopMemUsageMu sync.Mutex + queryTopDuration []queryTopDuration + queryTopDurationMu sync.Mutex } - queryInfo struct { + queryTopMemUsage struct { queryArgs - queryStatistics + queryMemUsage + protocol int + user string + } + + queryTopDuration struct { + queryArgs + duration time.Duration protocol int user string } @@ -228,7 +237,7 @@ type ( end int64 } - queryStatistics struct { + queryMemUsage struct { rowCount int colCount int memUsage int @@ -2993,6 +3002,7 @@ func loadPoints(ctx context.Context, h *requestHandler, pq *pointsQuery, lod dat return nil }}) duration := time.Since(start) + h.reportQueryDuration(duration) if err != nil { return 0, err } @@ -3380,14 +3390,14 @@ func (h *requestHandler) init(accessToken, version string) (err error) { return nil } -func (h *requestHandler) reportQueryDataSize(rowCount, colCount int) { +func (h *requestHandler) reportQueryMemUsage(rowCount, colCount int) { memUsage := 8 * rowCount * colCount if memUsage <= 0 { return } - h.queryTopMu.Lock() - defer h.queryTopMu.Unlock() - s := h.queryTop + h.queryTopMemUsageMu.Lock() + defer h.queryTopMemUsageMu.Unlock() + s := h.queryTopMemUsage i := len(s) for ; i > 0 && s[i-1].memUsage < memUsage; i-- { // pass @@ -3397,41 +3407,84 @@ func (h *requestHandler) reportQueryDataSize(rowCount, colCount int) { switch i { case 0: if len(s) == 0 { - s = make([]queryInfo, 0, maxLen+1) - s = append(s, h.getQueryInfo(rowCount, colCount, memUsage)) + s = make([]queryTopMemUsage, 0, maxLen+1) + s = append(s, h.queryMemUsage(rowCount, colCount, memUsage)) + } else { + s = append(s[:1], s...) + if len(s) > maxLen { + s = s[:maxLen] + } + s[0] = h.queryMemUsage(rowCount, colCount, memUsage) + } + top = true + case len(s): + if len(s) < maxLen && s[len(s)-1].expr != h.query.Expr { + s = append(s, h.queryMemUsage(rowCount, colCount, memUsage)) + top = true + } + default: + if s[i-1].expr != h.query.Expr { + s = append(s[:i+1], s[i+1:]...) + s[i] = h.queryMemUsage(rowCount, colCount, memUsage) + top = true + } + } + if top { + h.queryTopMemUsage = s + } +} + +func (h *requestHandler) reportQueryDuration(d time.Duration) { + if d <= 0 { + return + } + h.queryTopDurationMu.Lock() + defer h.queryTopDurationMu.Unlock() + s := h.queryTopDuration + i := len(s) + for ; i > 0 && s[i-1].duration < d; i-- { + // pass + } + var top bool + const maxLen = 100 + switch i { + case 0: + if len(s) == 0 { + s = make([]queryTopDuration, 0, maxLen+1) + s = append(s, h.queryDuration(d)) } else { s = append(s[:1], s...) if len(s) > maxLen { s = s[:maxLen] } - s[0] = h.getQueryInfo(rowCount, colCount, memUsage) + s[0] = h.queryDuration(d) } top = true case len(s): if len(s) < maxLen && s[len(s)-1].expr != h.query.Expr { - s = append(s, h.getQueryInfo(rowCount, colCount, memUsage)) + s = append(s, h.queryDuration(d)) top = true } default: if s[i-1].expr != h.query.Expr { s = append(s[:i+1], s[i+1:]...) - s[i] = h.getQueryInfo(rowCount, colCount, memUsage) + s[i] = h.queryDuration(d) top = true } } if top { - h.queryTop = s + h.queryTopDuration = s } } -func (h *requestHandler) getQueryInfo(rowCount, colCount, memUsage int) queryInfo { - return queryInfo{ +func (h *requestHandler) queryMemUsage(rowCount, colCount, memUsage int) queryTopMemUsage { + return queryTopMemUsage{ queryArgs: queryArgs{ expr: h.query.Expr, start: h.query.Start, end: h.query.End, }, - queryStatistics: queryStatistics{ + queryMemUsage: queryMemUsage{ rowCount: rowCount, colCount: colCount, memUsage: memUsage, @@ -3441,6 +3494,19 @@ func (h *requestHandler) getQueryInfo(rowCount, colCount, memUsage int) queryInf } } +func (h *requestHandler) queryDuration(d time.Duration) queryTopDuration { + return queryTopDuration{ + queryArgs: queryArgs{ + expr: h.query.Expr, + start: h.query.Start, + end: h.query.End, + }, + duration: d, + protocol: h.endpointStat.protocol, + user: h.endpointStat.user, + } +} + func HandleTagDraftList(r *httpRequestHandler) { m := make(map[string][]string) for _, metric := range r.metricsStorage.GetMetaMetricList(false) { diff --git a/internal/api/http_router.go b/internal/api/http_router.go index 753ca4d4d..bbbdd67fa 100644 --- a/internal/api/http_router.go +++ b/internal/api/http_router.go @@ -165,22 +165,22 @@ func DumpInternalServerErrors(r *httpRequestHandler) { } } -func DumpQueryTop(r *httpRequestHandler) { +func DumpQueryTopMemUsage(r *httpRequestHandler) { w := r.Response() if ok := r.accessInfo.insecureMode || r.accessInfo.bitAdmin; !ok { w.WriteHeader(http.StatusForbidden) return } w.Header().Set("Content-Type", "text/plain; charset=utf-8") - var s []queryInfo - r.queryTopMu.Lock() + var s []queryTopMemUsage + r.queryTopMemUsageMu.Lock() if r.FormValue("reset") != "" { - r.queryTop, s = s, r.queryTop + r.queryTopMemUsage, s = s, r.queryTopMemUsage } else { - s = make([]queryInfo, 0, len(r.queryTop)) - s = append(s, r.queryTop...) + s = make([]queryTopMemUsage, 0, len(r.queryTopMemUsage)) + s = append(s, r.queryTopMemUsage...) } - r.queryTopMu.Unlock() + r.queryTopMemUsageMu.Unlock() for _, v := range s { var protocol string switch v.protocol { @@ -198,6 +198,39 @@ func DumpQueryTop(r *httpRequestHandler) { } } +func DumpQueryTopDuration(r *httpRequestHandler) { + w := r.Response() + if ok := r.accessInfo.insecureMode || r.accessInfo.bitAdmin; !ok { + w.WriteHeader(http.StatusForbidden) + return + } + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + var s []queryTopDuration + r.queryTopDurationMu.Lock() + if r.FormValue("reset") != "" { + r.queryTopDuration, s = s, r.queryTopDuration + } else { + s = make([]queryTopDuration, 0, len(r.queryTopDuration)) + s = append(s, r.queryTopDuration...) + } + r.queryTopDurationMu.Unlock() + for _, v := range s { + var protocol string + switch v.protocol { + case format.TagValueIDRPC: + protocol = "RPC" + case format.TagValueIDHTTP: + protocol = "HTTP" + default: + protocol = strconv.Itoa(v.protocol) + } + w.Write([]byte(v.expr)) + w.Write([]byte(fmt.Sprintf( + "\n# duration=%v token=%s proto=%s\n\n", + v.duration, v.user, protocol))) + } +} + func (r *httpRequestHandler) Response() http.ResponseWriter { return &r.w } diff --git a/internal/api/promql.go b/internal/api/promql.go index fd87c9fd0..a02fada20 100644 --- a/internal/api/promql.go +++ b/internal/api/promql.go @@ -641,7 +641,7 @@ func (h *requestHandler) QuerySeries(ctx context.Context, qry *promql.SeriesQuer tagX = make(map[tsTags]int, len(tagX)) } res.Meta.Total = len(res.Data) - h.reportQueryDataSize(len(res.Data), len(qry.Timescale.Time)) + h.reportQueryMemUsage(len(res.Data), len(qry.Timescale.Time)) succeeded = true // prevents deffered "cleanup" return res, cleanup, nil }