From 8b149b085c4702b23d1adeae34cc9ebfb4ac3e2b Mon Sep 17 00:00:00 2001 From: Alexander Marshalov Date: Mon, 8 Sep 2025 22:47:06 +0200 Subject: [PATCH 01/26] implemented jaeger dependencies graph API (`/select/jaeger/api/dependencies`) --- app/vtselect/traces/jaeger/jaeger.go | 91 ++- app/vtselect/traces/jaeger/jaeger.qtpl | 25 + app/vtselect/traces/jaeger/jaeger.qtpl.go | 624 ++++++++++++--------- app/vtselect/traces/jaeger/model.go | 6 + app/vtselect/traces/query/query.go | 89 +++ docs/victoriatraces/README.md | 2 + docs/victoriatraces/changelog/CHANGELOG.md | 1 + docs/victoriatraces/querying/README.md | 1 + 8 files changed, 574 insertions(+), 265 deletions(-) diff --git a/app/vtselect/traces/jaeger/jaeger.go b/app/vtselect/traces/jaeger/jaeger.go index fe0a3d900..98833f181 100644 --- a/app/vtselect/traces/jaeger/jaeger.go +++ b/app/vtselect/traces/jaeger/jaeger.go @@ -73,8 +73,7 @@ func RequestHandler(ctx context.Context, w http.ResponseWriter, r *http.Request) return true } else if path == "/select/jaeger/api/dependencies" { jaegerDependenciesRequests.Inc() - // todo it require additional component to calculate the dependency graph. not implemented yet. - httpserver.Errorf(w, r, "/api/dependencies API is not supported yet.") + processGetDependenciesRequest(ctx, w, r) jaegerDependenciesDuration.UpdateDuration(startTime) return true } @@ -401,3 +400,91 @@ func hashProcess(process process) uint64 { hashpool.Put(d) return h } + +// processGetDependenciesRequest handle the Jaeger /api/dependencies API request. +func processGetDependenciesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) { + cp, err := query.GetCommonParams(r) + if err != nil { + httpserver.Errorf(w, r, "incorrect query params: %s", err) + return + } + + param, err := parseJaegerDependenciesQueryParam(ctx, r) + if err != nil { + httpserver.Errorf(w, r, "incorrect dependencies query params: %s", err) + return + } + + rows, err := query.GetDependencyList(ctx, cp, param) + if err != nil { + httpserver.Errorf(w, r, "get dependencies error: %s", err) + return + } + + if len(rows) == 0 { + // Write empty results + w.Header().Set("Content-Type", "application/json") + WriteGetDependenciesResponse(w, nil) + return + } + + dependencies := make([]*dependencyLink, 0) + for _, row := range rows { + dependency := &dependencyLink{} + for _, f := range row.Fields { + switch f.Name { + case "parent": + dependency.parent = f.Value + case "child": + dependency.child = f.Value + case "callCount": + dependency.callCount, err = strconv.ParseUint(f.Value, 10, 64) + if err != nil { + logger.Errorf("cannot parse callCount [%s]: %s", f.Value, err) + continue + } + } + } + if dependency.parent != "" && dependency.child != "" && dependency.callCount > 0 { + dependencies = append(dependencies, dependency) + } + } + + // Write results + w.Header().Set("Content-Type", "application/json") + WriteGetDependenciesResponse(w, dependencies) +} + +// parseJaegerDependenciesQueryParam parse Jaeger request to unified DependenciesQueryParameters. +func parseJaegerDependenciesQueryParam(_ context.Context, r *http.Request) (*query.DependenciesQueryParameters, error) { + var err error + + // default params + p := &query.DependenciesQueryParameters{ + EndTs: time.Now(), + Lookback: time.Hour * 24, + } + q := r.URL.Query() + + endTs := q.Get("endTs") + if endTs != "" { + unixMilli, err := strconv.ParseInt(endTs, 10, 64) + if err != nil { + return nil, fmt.Errorf("cannot parse endTs [%s]: %w", endTs, err) + } + p.EndTs = time.UnixMilli(unixMilli) + } + + lookback := q.Get("lookback") + if lookback != "" { + if strings.TrimLeft(lookback, "0123456789") == "" { + lookback += "ms" + } + p.Lookback, err = time.ParseDuration(lookback) + if err != nil { + return nil, fmt.Errorf("cannot parse lookback [%s]: %w", lookback, err) + } + } + + return p, nil +} diff --git a/app/vtselect/traces/jaeger/jaeger.qtpl b/app/vtselect/traces/jaeger/jaeger.qtpl index ab129dbe4..d816c3cdc 100644 --- a/app/vtselect/traces/jaeger/jaeger.qtpl +++ b/app/vtselect/traces/jaeger/jaeger.qtpl @@ -63,6 +63,31 @@ } {% endfunc %} +{% func GetDependenciesResponse(dependencies []*dependencyLink) %} +{ + "data":[ + {% if len(dependencies) > 0 %} + {%= dependencyJson(dependencies[0]) %} + {% for _, dependency := range dependencies[1:] %} + ,{%= dependencyJson(dependency) %} + {% endfor %} + {% endif %} + ], + "errors": null, + "limit": 0, + "offset": 0, + "total": {%d= len(dependencies) %} +} +{% endfunc %} + +{% func dependencyJson(dependency *dependencyLink) %} +{ + "parent": {%q= dependency.parent %}, + "child": {%q= dependency.child %}, + "callCount": {%dul= dependency.callCount %} +} +{% endfunc %} + {% func traceJson(trace *trace) %} { "processes": { diff --git a/app/vtselect/traces/jaeger/jaeger.qtpl.go b/app/vtselect/traces/jaeger/jaeger.qtpl.go index 51318059c..e15475b8f 100644 --- a/app/vtselect/traces/jaeger/jaeger.qtpl.go +++ b/app/vtselect/traces/jaeger/jaeger.qtpl.go @@ -1,570 +1,668 @@ // Code generated by qtc from "jaeger.qtpl". DO NOT EDIT. // See https://github.com/valyala/quicktemplate for details. -//line app/vtselect/traces/jaeger/jaeger.qtpl:1 +//line jaeger.qtpl:1 package jaeger -//line app/vtselect/traces/jaeger/jaeger.qtpl:1 +//line jaeger.qtpl:1 import ( "sort" ) -//line app/vtselect/traces/jaeger/jaeger.qtpl:7 +//line jaeger.qtpl:7 import ( qtio422016 "io" qt422016 "github.com/valyala/quicktemplate" ) -//line app/vtselect/traces/jaeger/jaeger.qtpl:7 +//line jaeger.qtpl:7 var ( _ = qtio422016.Copy _ = qt422016.AcquireByteBuffer ) -//line app/vtselect/traces/jaeger/jaeger.qtpl:7 +//line jaeger.qtpl:7 func StreamGetServicesResponse(qw422016 *qt422016.Writer, serviceList []string) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:7 +//line jaeger.qtpl:7 qw422016.N().S(`{`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:10 +//line jaeger.qtpl:10 sort.Slice(serviceList, func(i, j int) bool { return serviceList[i] < serviceList[j] }) -//line app/vtselect/traces/jaeger/jaeger.qtpl:11 +//line jaeger.qtpl:11 qw422016.N().S(`"data":[`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:13 +//line jaeger.qtpl:13 if len(serviceList) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:14 +//line jaeger.qtpl:14 qw422016.N().Q(serviceList[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:15 +//line jaeger.qtpl:15 for _, service := range serviceList[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:15 +//line jaeger.qtpl:15 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:16 +//line jaeger.qtpl:16 qw422016.N().Q(service) -//line app/vtselect/traces/jaeger/jaeger.qtpl:17 +//line jaeger.qtpl:17 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:18 +//line jaeger.qtpl:18 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:18 +//line jaeger.qtpl:18 qw422016.N().S(`],"errors": null,"limit": 0,"offset": 0,"total":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:23 +//line jaeger.qtpl:23 qw422016.N().D(len(serviceList)) -//line app/vtselect/traces/jaeger/jaeger.qtpl:23 +//line jaeger.qtpl:23 qw422016.N().S(`}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 func WriteGetServicesResponse(qq422016 qtio422016.Writer, serviceList []string) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 StreamGetServicesResponse(qw422016, serviceList) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 func GetServicesResponse(serviceList []string) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 WriteGetServicesResponse(qb422016, serviceList) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:25 +//line jaeger.qtpl:25 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:27 +//line jaeger.qtpl:27 func StreamGetOperationsResponse(qw422016 *qt422016.Writer, operationList []string) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:27 +//line jaeger.qtpl:27 qw422016.N().S(`{`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:30 +//line jaeger.qtpl:30 sort.Slice(operationList, func(i, j int) bool { return operationList[i] < operationList[j] }) -//line app/vtselect/traces/jaeger/jaeger.qtpl:31 +//line jaeger.qtpl:31 qw422016.N().S(`"data":[`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:33 +//line jaeger.qtpl:33 if len(operationList) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:34 +//line jaeger.qtpl:34 qw422016.N().Q(operationList[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:35 +//line jaeger.qtpl:35 for _, operation := range operationList[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:35 +//line jaeger.qtpl:35 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:36 +//line jaeger.qtpl:36 qw422016.N().Q(operation) -//line app/vtselect/traces/jaeger/jaeger.qtpl:37 +//line jaeger.qtpl:37 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:38 +//line jaeger.qtpl:38 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:38 +//line jaeger.qtpl:38 qw422016.N().S(`],"errors": null,"limit": 0,"offset": 0,"total":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:43 +//line jaeger.qtpl:43 qw422016.N().D(len(operationList)) -//line app/vtselect/traces/jaeger/jaeger.qtpl:43 +//line jaeger.qtpl:43 qw422016.N().S(`}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 func WriteGetOperationsResponse(qq422016 qtio422016.Writer, operationList []string) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 StreamGetOperationsResponse(qw422016, operationList) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 func GetOperationsResponse(operationList []string) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 WriteGetOperationsResponse(qb422016, operationList) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:45 +//line jaeger.qtpl:45 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:47 +//line jaeger.qtpl:47 func StreamGetTracesResponse(qw422016 *qt422016.Writer, traces []*trace) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:47 +//line jaeger.qtpl:47 qw422016.N().S(`{"data":[`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:50 +//line jaeger.qtpl:50 if len(traces) > 0 && len(traces[0].spans) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:51 +//line jaeger.qtpl:51 streamtraceJson(qw422016, traces[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:52 +//line jaeger.qtpl:52 for _, trace := range traces[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:53 +//line jaeger.qtpl:53 if len(trace.spans) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:53 +//line jaeger.qtpl:53 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:54 +//line jaeger.qtpl:54 streamtraceJson(qw422016, trace) -//line app/vtselect/traces/jaeger/jaeger.qtpl:55 +//line jaeger.qtpl:55 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:56 +//line jaeger.qtpl:56 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:57 +//line jaeger.qtpl:57 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:57 +//line jaeger.qtpl:57 qw422016.N().S(`],"errors": null,"limit": 0,"offset": 0,"total":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:62 +//line jaeger.qtpl:62 qw422016.N().D(len(traces)) -//line app/vtselect/traces/jaeger/jaeger.qtpl:62 +//line jaeger.qtpl:62 qw422016.N().S(`}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 func WriteGetTracesResponse(qq422016 qtio422016.Writer, traces []*trace) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 StreamGetTracesResponse(qw422016, traces) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 func GetTracesResponse(traces []*trace) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 WriteGetTracesResponse(qb422016, traces) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:64 +//line jaeger.qtpl:64 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:66 +//line jaeger.qtpl:66 +func StreamGetDependenciesResponse(qw422016 *qt422016.Writer, dependencies []*dependencyLink) { +//line jaeger.qtpl:66 + qw422016.N().S(`{"data":[`) +//line jaeger.qtpl:69 + if len(dependencies) > 0 { +//line jaeger.qtpl:70 + streamdependencyJson(qw422016, dependencies[0]) +//line jaeger.qtpl:71 + for _, dependency := range dependencies[1:] { +//line jaeger.qtpl:71 + qw422016.N().S(`,`) +//line jaeger.qtpl:72 + streamdependencyJson(qw422016, dependency) +//line jaeger.qtpl:73 + } +//line jaeger.qtpl:74 + } +//line jaeger.qtpl:74 + qw422016.N().S(`],"errors": null,"limit": 0,"offset": 0,"total":`) +//line jaeger.qtpl:79 + qw422016.N().D(len(dependencies)) +//line jaeger.qtpl:79 + qw422016.N().S(`}`) +//line jaeger.qtpl:81 +} + +//line jaeger.qtpl:81 +func WriteGetDependenciesResponse(qq422016 qtio422016.Writer, dependencies []*dependencyLink) { +//line jaeger.qtpl:81 + qw422016 := qt422016.AcquireWriter(qq422016) +//line jaeger.qtpl:81 + StreamGetDependenciesResponse(qw422016, dependencies) +//line jaeger.qtpl:81 + qt422016.ReleaseWriter(qw422016) +//line jaeger.qtpl:81 +} + +//line jaeger.qtpl:81 +func GetDependenciesResponse(dependencies []*dependencyLink) string { +//line jaeger.qtpl:81 + qb422016 := qt422016.AcquireByteBuffer() +//line jaeger.qtpl:81 + WriteGetDependenciesResponse(qb422016, dependencies) +//line jaeger.qtpl:81 + qs422016 := string(qb422016.B) +//line jaeger.qtpl:81 + qt422016.ReleaseByteBuffer(qb422016) +//line jaeger.qtpl:81 + return qs422016 +//line jaeger.qtpl:81 +} + +//line jaeger.qtpl:83 +func streamdependencyJson(qw422016 *qt422016.Writer, dependency *dependencyLink) { +//line jaeger.qtpl:83 + qw422016.N().S(`{"parent":`) +//line jaeger.qtpl:85 + qw422016.N().Q(dependency.parent) +//line jaeger.qtpl:85 + qw422016.N().S(`,"child":`) +//line jaeger.qtpl:86 + qw422016.N().Q(dependency.child) +//line jaeger.qtpl:86 + qw422016.N().S(`,"callCount":`) +//line jaeger.qtpl:87 + qw422016.N().DUL(dependency.callCount) +//line jaeger.qtpl:87 + qw422016.N().S(`}`) +//line jaeger.qtpl:89 +} + +//line jaeger.qtpl:89 +func writedependencyJson(qq422016 qtio422016.Writer, dependency *dependencyLink) { +//line jaeger.qtpl:89 + qw422016 := qt422016.AcquireWriter(qq422016) +//line jaeger.qtpl:89 + streamdependencyJson(qw422016, dependency) +//line jaeger.qtpl:89 + qt422016.ReleaseWriter(qw422016) +//line jaeger.qtpl:89 +} + +//line jaeger.qtpl:89 +func dependencyJson(dependency *dependencyLink) string { +//line jaeger.qtpl:89 + qb422016 := qt422016.AcquireByteBuffer() +//line jaeger.qtpl:89 + writedependencyJson(qb422016, dependency) +//line jaeger.qtpl:89 + qs422016 := string(qb422016.B) +//line jaeger.qtpl:89 + qt422016.ReleaseByteBuffer(qb422016) +//line jaeger.qtpl:89 + return qs422016 +//line jaeger.qtpl:89 +} + +//line jaeger.qtpl:91 func streamtraceJson(qw422016 *qt422016.Writer, trace *trace) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:66 +//line jaeger.qtpl:91 qw422016.N().S(`{"processes": {`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:69 +//line jaeger.qtpl:94 if len(trace.processMap) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:70 +//line jaeger.qtpl:95 qw422016.N().Q(trace.processMap[0].processID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:70 +//line jaeger.qtpl:95 qw422016.N().S(`:`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:70 +//line jaeger.qtpl:95 streamprocessJson(qw422016, trace.processMap[0].process) -//line app/vtselect/traces/jaeger/jaeger.qtpl:71 +//line jaeger.qtpl:96 for _, v := range trace.processMap[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:71 +//line jaeger.qtpl:96 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:72 +//line jaeger.qtpl:97 qw422016.N().Q(v.processID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:72 +//line jaeger.qtpl:97 qw422016.N().S(`:`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:72 +//line jaeger.qtpl:97 streamprocessJson(qw422016, v.process) -//line app/vtselect/traces/jaeger/jaeger.qtpl:73 +//line jaeger.qtpl:98 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:74 +//line jaeger.qtpl:99 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:74 +//line jaeger.qtpl:99 qw422016.N().S(`},"spans": [`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:77 +//line jaeger.qtpl:102 if len(trace.spans) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:78 +//line jaeger.qtpl:103 streamspanJson(qw422016, trace.spans[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:79 +//line jaeger.qtpl:104 for _, v := range trace.spans[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:79 +//line jaeger.qtpl:104 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:80 +//line jaeger.qtpl:105 streamspanJson(qw422016, v) -//line app/vtselect/traces/jaeger/jaeger.qtpl:81 +//line jaeger.qtpl:106 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:82 +//line jaeger.qtpl:107 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:82 +//line jaeger.qtpl:107 qw422016.N().S(`],"traceID":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:84 +//line jaeger.qtpl:109 qw422016.N().Q(trace.spans[0].traceID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:84 +//line jaeger.qtpl:109 qw422016.N().S(`,"warnings": null}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 func writetraceJson(qq422016 qtio422016.Writer, trace *trace) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 streamtraceJson(qw422016, trace) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 func traceJson(trace *trace) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 writetraceJson(qb422016, trace) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:87 +//line jaeger.qtpl:112 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:89 +//line jaeger.qtpl:114 func streamprocessJson(qw422016 *qt422016.Writer, process process) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:89 +//line jaeger.qtpl:114 qw422016.N().S(`{"serviceName":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:91 +//line jaeger.qtpl:116 qw422016.N().Q(process.serviceName) -//line app/vtselect/traces/jaeger/jaeger.qtpl:91 +//line jaeger.qtpl:116 qw422016.N().S(`,"tags": [`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:93 +//line jaeger.qtpl:118 if len(process.tags) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:94 +//line jaeger.qtpl:119 streamtagJson(qw422016, process.tags[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:95 +//line jaeger.qtpl:120 for _, v := range process.tags[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:95 +//line jaeger.qtpl:120 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:96 +//line jaeger.qtpl:121 streamtagJson(qw422016, v) -//line app/vtselect/traces/jaeger/jaeger.qtpl:97 +//line jaeger.qtpl:122 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:98 +//line jaeger.qtpl:123 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:98 +//line jaeger.qtpl:123 qw422016.N().S(`]}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 func writeprocessJson(qq422016 qtio422016.Writer, process process) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 streamprocessJson(qw422016, process) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 func processJson(process process) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 writeprocessJson(qb422016, process) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:101 +//line jaeger.qtpl:126 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:103 +//line jaeger.qtpl:128 func streamspanJson(qw422016 *qt422016.Writer, span *span) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:103 +//line jaeger.qtpl:128 qw422016.N().S(`{"duration":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:105 +//line jaeger.qtpl:130 qw422016.N().DL(span.duration) -//line app/vtselect/traces/jaeger/jaeger.qtpl:105 +//line jaeger.qtpl:130 qw422016.N().S(`,"logs":[`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:107 +//line jaeger.qtpl:132 if len(span.logs) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:108 +//line jaeger.qtpl:133 streamlogJson(qw422016, span.logs[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:109 +//line jaeger.qtpl:134 for _, v := range span.logs[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:109 +//line jaeger.qtpl:134 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:110 +//line jaeger.qtpl:135 streamlogJson(qw422016, v) -//line app/vtselect/traces/jaeger/jaeger.qtpl:111 +//line jaeger.qtpl:136 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:112 +//line jaeger.qtpl:137 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:112 +//line jaeger.qtpl:137 qw422016.N().S(`],"operationName":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:114 +//line jaeger.qtpl:139 qw422016.N().Q(span.operationName) -//line app/vtselect/traces/jaeger/jaeger.qtpl:114 +//line jaeger.qtpl:139 qw422016.N().S(`,"processID":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:115 +//line jaeger.qtpl:140 qw422016.N().Q(span.processID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:115 +//line jaeger.qtpl:140 qw422016.N().S(`,"references": [`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:117 +//line jaeger.qtpl:142 if len(span.references) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:118 +//line jaeger.qtpl:143 streamspanRefJson(qw422016, span.references[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:119 +//line jaeger.qtpl:144 for _, v := range span.references[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:119 +//line jaeger.qtpl:144 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:120 +//line jaeger.qtpl:145 streamspanRefJson(qw422016, v) -//line app/vtselect/traces/jaeger/jaeger.qtpl:121 +//line jaeger.qtpl:146 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:122 +//line jaeger.qtpl:147 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:122 +//line jaeger.qtpl:147 qw422016.N().S(`],"spanID":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:124 +//line jaeger.qtpl:149 qw422016.N().Q(span.spanID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:124 +//line jaeger.qtpl:149 qw422016.N().S(`,"startTime":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:125 +//line jaeger.qtpl:150 qw422016.N().DL(span.startTime) -//line app/vtselect/traces/jaeger/jaeger.qtpl:125 +//line jaeger.qtpl:150 qw422016.N().S(`,"tags": [`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:127 +//line jaeger.qtpl:152 if len(span.tags) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:128 +//line jaeger.qtpl:153 streamtagJson(qw422016, span.tags[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:129 +//line jaeger.qtpl:154 for _, v := range span.tags[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:129 +//line jaeger.qtpl:154 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:130 +//line jaeger.qtpl:155 streamtagJson(qw422016, v) -//line app/vtselect/traces/jaeger/jaeger.qtpl:131 +//line jaeger.qtpl:156 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:132 +//line jaeger.qtpl:157 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:132 +//line jaeger.qtpl:157 qw422016.N().S(`],"traceID":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:134 +//line jaeger.qtpl:159 qw422016.N().Q(span.traceID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:134 +//line jaeger.qtpl:159 qw422016.N().S(`,"warnings":null}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 func writespanJson(qq422016 qtio422016.Writer, span *span) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 streamspanJson(qw422016, span) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 func spanJson(span *span) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 writespanJson(qb422016, span) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:137 +//line jaeger.qtpl:162 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:139 +//line jaeger.qtpl:164 func streamtagJson(qw422016 *qt422016.Writer, tag keyValue) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:139 +//line jaeger.qtpl:164 qw422016.N().S(`{"key":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:141 +//line jaeger.qtpl:166 qw422016.N().Q(tag.key) -//line app/vtselect/traces/jaeger/jaeger.qtpl:141 +//line jaeger.qtpl:166 qw422016.N().S(`,"type":"string","value":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:143 +//line jaeger.qtpl:168 qw422016.N().Q(tag.vStr) -//line app/vtselect/traces/jaeger/jaeger.qtpl:143 +//line jaeger.qtpl:168 qw422016.N().S(`}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 func writetagJson(qq422016 qtio422016.Writer, tag keyValue) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 streamtagJson(qw422016, tag) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 func tagJson(tag keyValue) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 writetagJson(qb422016, tag) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:145 +//line jaeger.qtpl:170 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:147 +//line jaeger.qtpl:172 func streamlogJson(qw422016 *qt422016.Writer, l log) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:147 +//line jaeger.qtpl:172 qw422016.N().S(`{"timestamp":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:149 +//line jaeger.qtpl:174 qw422016.N().DL(l.timestamp) -//line app/vtselect/traces/jaeger/jaeger.qtpl:149 +//line jaeger.qtpl:174 qw422016.N().S(`,"fields":[`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:151 +//line jaeger.qtpl:176 if len(l.fields) > 0 { -//line app/vtselect/traces/jaeger/jaeger.qtpl:152 +//line jaeger.qtpl:177 streamtagJson(qw422016, l.fields[0]) -//line app/vtselect/traces/jaeger/jaeger.qtpl:153 +//line jaeger.qtpl:178 for _, v := range l.fields[1:] { -//line app/vtselect/traces/jaeger/jaeger.qtpl:153 +//line jaeger.qtpl:178 qw422016.N().S(`,`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:154 +//line jaeger.qtpl:179 streamtagJson(qw422016, v) -//line app/vtselect/traces/jaeger/jaeger.qtpl:155 +//line jaeger.qtpl:180 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:156 +//line jaeger.qtpl:181 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:156 +//line jaeger.qtpl:181 qw422016.N().S(`]}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 func writelogJson(qq422016 qtio422016.Writer, l log) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 streamlogJson(qw422016, l) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 func logJson(l log) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 writelogJson(qb422016, l) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:159 +//line jaeger.qtpl:184 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:161 +//line jaeger.qtpl:186 func streamspanRefJson(qw422016 *qt422016.Writer, ref spanRef) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:161 +//line jaeger.qtpl:186 qw422016.N().S(`{"refType":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:163 +//line jaeger.qtpl:188 qw422016.N().Q(ref.refType) -//line app/vtselect/traces/jaeger/jaeger.qtpl:163 +//line jaeger.qtpl:188 qw422016.N().S(`,"spanID":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:164 +//line jaeger.qtpl:189 qw422016.N().Q(ref.spanID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:164 +//line jaeger.qtpl:189 qw422016.N().S(`,"traceID":`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:165 +//line jaeger.qtpl:190 qw422016.N().Q(ref.traceID) -//line app/vtselect/traces/jaeger/jaeger.qtpl:165 +//line jaeger.qtpl:190 qw422016.N().S(`}`) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 func writespanRefJson(qq422016 qtio422016.Writer, ref spanRef) { -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 qw422016 := qt422016.AcquireWriter(qq422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 streamspanRefJson(qw422016, ref) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 qt422016.ReleaseWriter(qw422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 } -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 func spanRefJson(ref spanRef) string { -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 qb422016 := qt422016.AcquireByteBuffer() -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 writespanRefJson(qb422016, ref) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 qs422016 := string(qb422016.B) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 qt422016.ReleaseByteBuffer(qb422016) -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 return qs422016 -//line app/vtselect/traces/jaeger/jaeger.qtpl:167 +//line jaeger.qtpl:192 } diff --git a/app/vtselect/traces/jaeger/model.go b/app/vtselect/traces/jaeger/model.go index 735cdd8f6..18c1afe20 100644 --- a/app/vtselect/traces/jaeger/model.go +++ b/app/vtselect/traces/jaeger/model.go @@ -56,6 +56,12 @@ type log struct { fields []keyValue } +type dependencyLink struct { + parent string + child string + callCount uint64 +} + // since Jaeger renamed some fields in OpenTelemetry // into other span attributes during query, the following map // is created to translate the span attributes filter into the diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index 22796b693..bfe7196b0 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -31,6 +31,8 @@ var ( "This limit affects Jaeger's /api/services API.") traceMaxSpanNameList = flag.Uint64("search.traceMaxSpanNameList", 1000, "The maximum number of span name can return in a get span name request. "+ "This limit affects Jaeger's /api/services/*/operations API.") + traceMaxDependencyList = flag.Uint64("search.traceMaxDependencyList", 0, "The maximum number of dependency links can return in a get dependencies request. "+ + "This limit affects Jaeger's /api/dependencies API. Not limited by default.") ) var ( @@ -566,3 +568,90 @@ func checkTraceIDList(traceIDList []string) []string { } return result } + +type DependenciesQueryParameters struct { + EndTs time.Time + Lookback time.Duration +} + +// GetDependencyList returns service dependencies graph edges (parent, child, callCount) in []*Row format. +func GetDependencyList(ctx context.Context, cp *CommonParams, param *DependenciesQueryParameters) ([]*Row, error) { + qStrParentSpans := fmt.Sprintf( + `NOT %s:"" | fields %s, %s | rename %s as %s, %s as child`, + otelpb.ParentSpanIDField, + otelpb.ParentSpanIDField, + otelpb.ResourceAttrServiceName, + otelpb.ParentSpanIDField, + otelpb.SpanIDField, + otelpb.ResourceAttrServiceName, + ) + qStrChildSpans := fmt.Sprintf( + `NOT %s:"" | fields %s, %s | rename %s as parent`, + otelpb.SpanIDField, + otelpb.SpanIDField, + otelpb.ResourceAttrServiceName, + otelpb.ResourceAttrServiceName, + ) + qStr := fmt.Sprintf( + `%s | join by (%s) (%s) inner | NOT parent:eq_field(child) | stats by (parent, child) count() callCount`, + qStrParentSpans, + otelpb.SpanIDField, + qStrChildSpans, + ) + + startTime := param.EndTs.Add(-param.Lookback).UnixNano() + endTime := param.EndTs.UnixNano() + + q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime) + if err != nil { + return nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err) + } + q.AddTimeFilter(startTime, endTime) + if *traceMaxDependencyList > 0 { + q.AddPipeLimit(*traceMaxDependencyList) + } + + var rowsLock sync.Mutex + var rows []*Row + //var missingTimeColumn atomic.Bool + writeBlock := func(_ uint, db *logstorage.DataBlock) { + columns := db.Columns + if len(columns) == 0 { + return + } + clonedColumnNames := make([]string, len(columns)) + valuesCount := 0 + for i, c := range columns { + clonedColumnNames[i] = strings.Clone(c.Name) + if len(c.Values) > valuesCount { + valuesCount = len(c.Values) + } + } + if valuesCount == 0 { + return + } + for i := 0; i < valuesCount; i++ { + fields := make([]logstorage.Field, 0, len(columns)) + for j := range columns { + fields = append( + fields, + logstorage.Field{ + Name: clonedColumnNames[j], + Value: strings.Clone(columns[j].Values[i]), + }, + ) + } + rowsLock.Lock() + rows = append(rows, &Row{ + Fields: fields, + }) + rowsLock.Unlock() + } + } + + if err = vtstorage.RunQuery(ctx, cp.TenantIDs, q, writeBlock); err != nil { + return nil, err + } + + return rows, nil +} diff --git a/docs/victoriatraces/README.md b/docs/victoriatraces/README.md index b7e3473b1..1bf71dbde 100644 --- a/docs/victoriatraces/README.md +++ b/docs/victoriatraces/README.md @@ -513,6 +513,8 @@ It is recommended protecting internal HTTP endpoints from unauthorized access: The maximum number of service name can return in a get service name request. This limit affects Jaeger's /api/services API. (default 1000) -search.traceMaxSpanNameList uint The maximum number of span name can return in a get span name request. This limit affects Jaeger's /api/services/*/operations API. (default 1000) + -search.traceMaxDependencyList uint + The maximum number of dependency links can return in a get dependencies request. This limit affects Jaeger's /api/dependencies API. Not limited by default. (default 0) -search.traceSearchStep duration Splits the [0, now] time range into many small time ranges by -search.traceSearchStep when searching for spans by trace_id. Once it finds spans in a time range, it performs an additional search according to -search.traceMaxDurationWindow and then stops. It affects Jaeger's /api/traces/ API. (default 24h0m0s) -search.traceServiceAndSpanNameLookbehind duration diff --git a/docs/victoriatraces/changelog/CHANGELOG.md b/docs/victoriatraces/changelog/CHANGELOG.md index e58aedddb..1409b66f0 100644 --- a/docs/victoriatraces/changelog/CHANGELOG.md +++ b/docs/victoriatraces/changelog/CHANGELOG.md @@ -17,6 +17,7 @@ The following `tip` changes can be tested by building VictoriaTraces components * FEATURE: [logstorage](https://docs.victoriametrics.com/victorialogs/): Upgrade VictoriaLogs dependency from [v1.27.0 to v1.33.1](https://github.com/VictoriaMetrics/VictoriaLogs/compare/v1.27.0...v1.33.1). * FEATURE: [docker compose](https://github.com/VictoriaMetrics/VictoriaTraces/tree/master/deployment/docker): add cluster docker compose environment. * FEATURE: [dashboards](https://github.com/VictoriaMetrics/VictoriaTraces/blob/master/dashboards): update dashboard for VictoriaTraces single-node and cluster to provide more charts. +* FEATURE: implemented jaeger dependencies graph API (`/select/jaeger/api/dependencies`). * BUGFIX: [Single-node VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) and vtinsert in [VictoriaTraces cluster](https://docs.victoriametrics.com/victoriatraces/cluster/): Rename various [HTTP headers](https://docs.victoriametrics.com/victoriatraces/data-ingestion/#http-headers) prefix from `VL-` to `VT-`. These headers help with debugging and customizing stream fields. Thank @JayiceZ for [the pull request](https://github.com/VictoriaMetrics/VictoriaTraces/pull/56). * BUGFIX: all components: properly expose metadata for summaries and histograms in VictoriaMetrics components with enabled `-metrics.exposeMetadata` cmd-line flag. See [metrics#98](https://github.com/VictoriaMetrics/metrics/issues/98) for details. diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 8f0fd3e9a..8620e8be6 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -38,6 +38,7 @@ Additionally, the following Jaeger HTTP endpoints are available: - `/select/jaeger/api/services/{service_name}/operations` for querying all the span names of a service. - [`/select/jaeger/api/traces`](#querying-traces) for querying traces. - `/select/jaeger/api/traces/{trace_id}` for querying a trace. +- `/select/jaeger/api/dependencies` for querying dependencies graph for services. ### Querying traces From a34c78206b602c723c32a92fd88d83219556b451 Mon Sep 17 00:00:00 2001 From: Alexander Marshalov <_@marshalov.org> Date: Tue, 9 Sep 2025 11:01:37 +0200 Subject: [PATCH 02/26] Fix formatting in README.md for dependency links --- docs/victoriatraces/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/victoriatraces/README.md b/docs/victoriatraces/README.md index 1bf71dbde..5ab05c7ab 100644 --- a/docs/victoriatraces/README.md +++ b/docs/victoriatraces/README.md @@ -514,7 +514,7 @@ It is recommended protecting internal HTTP endpoints from unauthorized access: -search.traceMaxSpanNameList uint The maximum number of span name can return in a get span name request. This limit affects Jaeger's /api/services/*/operations API. (default 1000) -search.traceMaxDependencyList uint - The maximum number of dependency links can return in a get dependencies request. This limit affects Jaeger's /api/dependencies API. Not limited by default. (default 0) + The maximum number of dependency links can return in a get dependencies request. This limit affects Jaeger's /api/dependencies API. Not limited by default. (default 0) -search.traceSearchStep duration Splits the [0, now] time range into many small time ranges by -search.traceSearchStep when searching for spans by trace_id. Once it finds spans in a time range, it performs an additional search according to -search.traceMaxDurationWindow and then stops. It affects Jaeger's /api/traces/ API. (default 24h0m0s) -search.traceServiceAndSpanNameLookbehind duration From 26922140adcbdde6289425a308559f4159123214 Mon Sep 17 00:00:00 2001 From: Alexander Marshalov Date: Mon, 15 Sep 2025 09:47:57 +0200 Subject: [PATCH 03/26] fix after rebasing master --- app/vtselect/traces/query/query.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index bfe7196b0..c82a44973 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -608,8 +608,10 @@ func GetDependencyList(ctx context.Context, cp *CommonParams, param *Dependencie } q.AddTimeFilter(startTime, endTime) if *traceMaxDependencyList > 0 { - q.AddPipeLimit(*traceMaxDependencyList) + q.AddPipeOffsetLimit(0, *traceMaxDependencyList) } + cp.Query = q + qctx := cp.NewQueryContext(ctx) var rowsLock sync.Mutex var rows []*Row @@ -649,7 +651,7 @@ func GetDependencyList(ctx context.Context, cp *CommonParams, param *Dependencie } } - if err = vtstorage.RunQuery(ctx, cp.TenantIDs, q, writeBlock); err != nil { + if err = vtstorage.RunQuery(qctx, writeBlock); err != nil { return nil, err } From d305081a1f716ddfd32280d61c1020dece9b52d1 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Fri, 19 Sep 2025 00:22:09 +0800 Subject: [PATCH 04/26] feature: [vtgen] improve trace span_id generation logic --- app/vtgen/main.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/app/vtgen/main.go b/app/vtgen/main.go index db2a33e33..b0ddcf85e 100644 --- a/app/vtgen/main.go +++ b/app/vtgen/main.go @@ -68,6 +68,7 @@ func main() { // The traceIDMap recorded old traceID->new traceID. // Spans with same old traceID should be replaced with same new traceID. traceIDMap := make(map[string]string) + spanIDMap := make(map[string]string) // The timeOffset is the time offset of span timestamp and current timestamp. // All spans' timestamp should be increased by this offset. @@ -115,6 +116,26 @@ func main() { } } + // replace SpanID and parentSpanID + if sid, ok := spanIDMap[sp.SpanID]; ok { + sp.SpanID = sid + } else { + spanID := generateSpanID() + oldSpanID := sp.SpanID + sp.SpanID = spanID + spanIDMap[oldSpanID] = spanID + } + + // replace SpanID and parentSpanID + if sid, ok := spanIDMap[sp.ParentSpanID]; ok { + sp.ParentSpanID = sid + } else { + parentSpanID := generateSpanID() + oldParentSpanID := sp.ParentSpanID + sp.ParentSpanID = parentSpanID + spanIDMap[oldParentSpanID] = parentSpanID + } + // adjust the timestamp of the span. sp.StartTimeUnixNano = sp.StartTimeUnixNano + timeOffset sp.EndTimeUnixNano = sp.EndTimeUnixNano + timeOffset + uint64(rand.Int63n(100000000)) @@ -204,6 +225,12 @@ func generateTraceID() string { return hex.EncodeToString(h.Sum(nil)) } +func generateSpanID() string { + h := md5.New() + h.Write([]byte(strconv.FormatInt(time.Now().UnixNano(), 10))) + return hex.EncodeToString(h.Sum(nil))[:16] +} + // readWrite Does the following: // 1. read request body binary files like `1.bin`, `2.bin` and puts them into `BodyList`. // 2. encode and compress the `BodyList` into `[]byte`. From b9f10d9f8a8e66e627e9156d6ae007c257a19301 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Fri, 19 Sep 2025 00:23:49 +0800 Subject: [PATCH 05/26] feature: [vtselect] limit the lookbehind window for dependency API to 1m, and provide flag for configuration --- app/vtselect/traces/jaeger/jaeger.go | 2 +- app/vtselect/traces/query/query.go | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/app/vtselect/traces/jaeger/jaeger.go b/app/vtselect/traces/jaeger/jaeger.go index 98833f181..6b78640b4 100644 --- a/app/vtselect/traces/jaeger/jaeger.go +++ b/app/vtselect/traces/jaeger/jaeger.go @@ -462,7 +462,7 @@ func parseJaegerDependenciesQueryParam(_ context.Context, r *http.Request) (*que // default params p := &query.DependenciesQueryParameters{ EndTs: time.Now(), - Lookback: time.Hour * 24, + Lookback: time.Minute * 1, } q := r.URL.Query() diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index c82a44973..c5a0cb1ec 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -31,8 +31,10 @@ var ( "This limit affects Jaeger's /api/services API.") traceMaxSpanNameList = flag.Uint64("search.traceMaxSpanNameList", 1000, "The maximum number of span name can return in a get span name request. "+ "This limit affects Jaeger's /api/services/*/operations API.") - traceMaxDependencyList = flag.Uint64("search.traceMaxDependencyList", 0, "The maximum number of dependency links can return in a get dependencies request. "+ - "This limit affects Jaeger's /api/dependencies API. Not limited by default.") + traceMaxDependencyList = flag.Uint64("search.traceMaxDependencyList", 1000, "The maximum number of dependency links can return in a get dependencies request. "+ + "This limit affects Jaeger's /api/dependencies API.") + traceMaxDependencyLookBehind = flag.Duration("search.traceMaxDependencyLookbehind", 1*time.Minute, "The maximum window for dependency analysis in real-time. "+ + "Increasing this duration will allow analysis across a longer time range, but it will increase the risk of performance degradation and higher resource usage.") ) var ( @@ -599,6 +601,9 @@ func GetDependencyList(ctx context.Context, cp *CommonParams, param *Dependencie qStrChildSpans, ) + if *traceMaxDependencyLookBehind > 0 && param.Lookback > *traceMaxDependencyLookBehind { + param.Lookback = *traceMaxDependencyLookBehind + } startTime := param.EndTs.Add(-param.Lookback).UnixNano() endTime := param.EndTs.UnixNano() From 388e6806ffacc110e28a60d5a72290ddf16a0c3f Mon Sep 17 00:00:00 2001 From: Jiekun Date: Fri, 19 Sep 2025 00:50:26 +0800 Subject: [PATCH 06/26] feature: [dependency] add API doc --- docs/victoriatraces/querying/README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 8620e8be6..3bbaef592 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -118,3 +118,25 @@ Here's a response example: ```json {"data":[{"processes":{"p1":{"serviceName":"email","tags":[{"key":"process.command","type":"string","value":"email_server.rb"},{"key":"process.pid","type":"string","value":"1"},{"key":"process.runtime.description","type":"string","value":"ruby 3.4.4 (2025-05-14 revision a38531fd3f) +PRISM [aarch64-linux-musl]"},{"key":"process.runtime.name","type":"string","value":"ruby"},{"key":"process.runtime.version","type":"string","value":"3.4.4"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"ruby"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.8.0"}]},"p10":{"serviceName":"load-generator","tags":[{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"python"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.34.0"}]},"p11":{"serviceName":"product-catalog","tags":[{"key":"host.name","type":"string","value":"3dabfcfe8381"},{"key":"os.description","type":"string","value":"Debian GNU/Linux Debian GNU/Linux 12 (bookworm) (Linux 3dabfcfe8381 6.10.14-linuxkit #1 SMP Tue Apr 15 16:00:54 UTC 2025 aarch64)"},{"key":"os.type","type":"string","value":"linux"},{"key":"process.command_args","type":"string","value":"[\"./product-catalog\"]"},{"key":"process.executable.name","type":"string","value":"product-catalog"},{"key":"process.executable.path","type":"string","value":"/usr/src/app/product-catalog"},{"key":"process.owner","type":"string","value":"nonroot"},{"key":"process.pid","type":"string","value":"1"},{"key":"process.runtime.description","type":"string","value":"go version go1.24.4 linux/arm64"},{"key":"process.runtime.name","type":"string","value":"go"},{"key":"process.runtime.version","type":"string","value":"go1.24.4"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"go"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.36.0"}]},"p12":{"serviceName":"currency","tags":[{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"cpp"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.20.0"}]},"p2":{"serviceName":"quote","tags":[{"key":"container.id","type":"string","value":"759183873eeb1328f16df8ea5b5a10932506af136a6537c6a365131c04f1645c"},{"key":"host.arch","type":"string","value":"aarch64"},{"key":"host.name","type":"string","value":"759183873eeb"},{"key":"os.description","type":"string","value":"6.10.14-linuxkit"},{"key":"os.name","type":"string","value":"Linux"},{"key":"os.type","type":"string","value":"linux"},{"key":"os.version","type":"string","value":"#1 SMP Tue Apr 15 16:00:54 UTC 2025"},{"key":"process.command","type":"string","value":"public/index.php"},{"key":"process.command_args","type":"string","value":"[\"public/index.php\"]"},{"key":"process.executable.path","type":"string","value":"/usr/local/bin/php"},{"key":"process.owner","type":"string","value":"www-data"},{"key":"process.pid","type":"string","value":"1"},{"key":"process.runtime.name","type":"string","value":"cli"},{"key":"process.runtime.version","type":"string","value":"8.3.22"},{"key":"service.instance.id","type":"string","value":"9dc0abaa-c408-483e-9fed-8375a73efb91"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.distro.name","type":"string","value":"opentelemetry-php-instrumentation"},{"key":"telemetry.distro.version","type":"string","value":"1.1.3"},{"key":"telemetry.sdk.language","type":"string","value":"php"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.5.0"}]},"p3":{"serviceName":"frontend","tags":[{"key":"container.id","type":"string","value":"2d395f01353040612a00252cf6e8c32f00ab94ae06f82f143a3ea9c742072674"},{"key":"host.arch","type":"string","value":"arm64"},{"key":"host.name","type":"string","value":"2d395f013530"},{"key":"os.type","type":"string","value":"linux"},{"key":"os.version","type":"string","value":"6.10.14-linuxkit"},{"key":"process.command","type":"string","value":"/app/server.js"},{"key":"process.command_args","type":"string","value":"[\"/usr/local/bin/node\",\"--require\",\"./Instrumentation.js\",\"/app/server.js\"]"},{"key":"process.executable.name","type":"string","value":"node"},{"key":"process.executable.path","type":"string","value":"/usr/local/bin/node"},{"key":"process.owner","type":"string","value":"nextjs"},{"key":"process.pid","type":"string","value":"17"},{"key":"process.runtime.description","type":"string","value":"Node.js"},{"key":"process.runtime.name","type":"string","value":"nodejs"},{"key":"process.runtime.version","type":"string","value":"22.16.0"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"nodejs"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.30.1"}]},"p4":{"serviceName":"payment","tags":[{"key":"container.id","type":"string","value":"18ee03279d38ed0e0eedad037c260df78dfc3323aa662ca14a2d38fcc8bf3762"},{"key":"host.arch","type":"string","value":"arm64"},{"key":"host.name","type":"string","value":"18ee03279d38"},{"key":"os.type","type":"string","value":"linux"},{"key":"os.version","type":"string","value":"6.10.14-linuxkit"},{"key":"process.command","type":"string","value":"/usr/src/app/index.js"},{"key":"process.command_args","type":"string","value":"[\"/usr/local/bin/node\",\"--require\",\"./opentelemetry.js\",\"/usr/src/app/index.js\"]"},{"key":"process.executable.name","type":"string","value":"node"},{"key":"process.executable.path","type":"string","value":"/usr/local/bin/node"},{"key":"process.owner","type":"string","value":"node"},{"key":"process.pid","type":"string","value":"17"},{"key":"process.runtime.description","type":"string","value":"Node.js"},{"key":"process.runtime.name","type":"string","value":"nodejs"},{"key":"process.runtime.version","type":"string","value":"22.16.0"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"nodejs"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.30.1"}]},"p5":{"serviceName":"flagd","tags":[{"key":"host.name","type":"string","value":"1f315d8a0f78"},{"key":"os.description","type":"string","value":"Debian GNU/Linux Debian GNU/Linux 12 (bookworm) (Linux 1f315d8a0f78 6.10.14-linuxkit #1 SMP Tue Apr 15 16:00:54 UTC 2025 aarch64)"},{"key":"os.type","type":"string","value":"linux"},{"key":"process.runtime.version","type":"string","value":"go1.24.1"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"v0.12.3"},{"key":"telemetry.sdk.language","type":"string","value":"go"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.35.0"}]},"p6":{"serviceName":"shipping","tags":[{"key":"os.type","type":"string","value":"linux"},{"key":"process.command_args","type":"string","value":"[\"/app/shipping\"]"},{"key":"process.pid","type":"string","value":"1"},{"key":"process.runtime.description","type":"string","value":"rustc 1.82.0 (f6e511eec 2024-10-15)"},{"key":"process.runtime.name","type":"string","value":"rustc"},{"key":"process.runtime.version","type":"string","value":"1.82.0"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"rust"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"0.30.0"}]},"p7":{"serviceName":"checkout","tags":[{"key":"host.name","type":"string","value":"cbdb5e0808c2"},{"key":"os.description","type":"string","value":"Debian GNU/Linux Debian GNU/Linux 12 (bookworm) (Linux cbdb5e0808c2 6.10.14-linuxkit #1 SMP Tue Apr 15 16:00:54 UTC 2025 aarch64)"},{"key":"os.type","type":"string","value":"linux"},{"key":"process.command_args","type":"string","value":"[\"./checkout\"]"},{"key":"process.executable.name","type":"string","value":"checkout"},{"key":"process.executable.path","type":"string","value":"/usr/src/app/checkout"},{"key":"process.owner","type":"string","value":"nonroot"},{"key":"process.pid","type":"string","value":"1"},{"key":"process.runtime.description","type":"string","value":"go version go1.24.4 linux/arm64"},{"key":"process.runtime.name","type":"string","value":"go"},{"key":"process.runtime.version","type":"string","value":"go1.24.4"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"go"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.36.0"}]},"p8":{"serviceName":"frontend-proxy","tags":[{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"}]},"p9":{"serviceName":"cart","tags":[{"key":"container.id","type":"string","value":"5603ff989877ecf311403b6ea81fda10734846a0cbdad3a09c39fb068e4a07fc"},{"key":"host.name","type":"string","value":"5603ff989877"},{"key":"service.namespace","type":"string","value":"opentelemetry-demo"},{"key":"service.version","type":"string","value":"2.0.2"},{"key":"telemetry.sdk.language","type":"string","value":"dotnet"},{"key":"telemetry.sdk.name","type":"string","value":"opentelemetry"},{"key":"telemetry.sdk.version","type":"string","value":"1.11.2"}]}},"spans":[{"duration":4935,"logs":[],"operationName":"send_email","processID":"p1","references":[{"refType":"CHILD_OF","spanID":"739cd04d718779ae","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"032bf7007e123e8d","startTime":1750044449769690,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"email"},{"key":"error","type":"string","value":"unset"},{"key":"app.email.recipient","type":"string","value":"reed@example.com"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":3339,"logs":[{"timestamp":1750044449717803,"fields":[{"key":"event","type":"string","value":"Received get quote request, processing it"}]},{"timestamp":1750044449718100,"fields":[{"key":"event","type":"string","value":"Quote processed, response sent back"},{"key":"app.quote.cost.total","type":"string","value":"227.5"}]}],"operationName":"{closure}","processID":"p2","references":[{"refType":"CHILD_OF","spanID":"aaf29afb62662d95","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"ea80042fbe6e5887","startTime":1750044449717692,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"io.opentelemetry.contrib.php.slim"},{"key":"code.file.path","type":"string","value":"/var/www/vendor/php-di/slim-bridge/src/ControllerInvoker.php"},{"key":"code.function.name","type":"string","value":"DI\\Bridge\\Slim\\ControllerInvoker::__invoke"},{"key":"code.line.number","type":"string","value":"29"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":6544,"logs":[],"operationName":"POST /getquote","processID":"p2","references":[{"refType":"CHILD_OF","spanID":"09b03b9b5481c29c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"aaf29afb62662d95","startTime":1750044449717102,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"io.opentelemetry.contrib.php.slim"},{"key":"code.file.path","type":"string","value":"/var/www/vendor/slim/slim/Slim/App.php"},{"key":"code.function.name","type":"string","value":"Slim\\App::handle"},{"key":"code.line.number","type":"string","value":"207"},{"key":"http.request.body.size","type":"string","value":"19"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.body.size","type":"string","value":"-"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"http.route","type":"string","value":"/getquote"},{"key":"network.protocol.version","type":"string","value":"1.1"},{"key":"server.address","type":"string","value":"quote"},{"key":"server.port","type":"string","value":"8090"},{"key":"url.full","type":"string","value":"http://quote:8090/getquote"},{"key":"url.path","type":"string","value":"/getquote"},{"key":"url.scheme","type":"string","value":"http"},{"key":"user_agent.original","type":"string","value":"-"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":77220,"logs":[],"operationName":"executing api route (pages) /api/checkout","processID":"p3","references":[{"refType":"CHILD_OF","spanID":"01468af9419620f5","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"6b73da57ebca1b82","startTime":1750044449702000,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"next.js"},{"key":"otel.scope.version","type":"string","value":"0.0.1"},{"key":"http.status_code","type":"string","value":"200"},{"key":"next.span_name","type":"string","value":"executing api route (pages) /api/checkout"},{"key":"next.span_type","type":"string","value":"Node.runHandler"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":78153,"logs":[],"operationName":"POST","processID":"p3","references":[{"refType":"CHILD_OF","spanID":"df1b3d5c8e0ab6be","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"47c48aa63a0c5a3d","startTime":1750044449701000,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"@opentelemetry/instrumentation-http"},{"key":"otel.scope.version","type":"string","value":"0.57.1"},{"key":"http.flavor","type":"string","value":"1.1"},{"key":"http.host","type":"string","value":"frontend-proxy:8080"},{"key":"http.method","type":"string","value":"POST"},{"key":"http.scheme","type":"string","value":"http"},{"key":"http.status_code","type":"string","value":"200"},{"key":"http.user_agent","type":"string","value":"python-requests/2.32.4"},{"key":"net.host.name","type":"string","value":"frontend-proxy"},{"key":"net.peer.ip","type":"string","value":"172.18.0.26"},{"key":"net.transport","type":"string","value":"ip_tcp"},{"key":"error","type":"string","value":"unset"},{"key":"http.request_content_length_uncompressed","type":"string","value":"388"},{"key":"http.status_text","type":"string","value":"OK"},{"key":"http.target","type":"string","value":"/api/checkout"},{"key":"http.url","type":"string","value":"http://frontend-proxy:8080/api/checkout"},{"key":"net.host.ip","type":"string","value":"172.18.0.24"},{"key":"net.host.port","type":"string","value":"8080"},{"key":"net.peer.port","type":"string","value":"35632"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":1988,"logs":[],"operationName":"charge","processID":"p4","references":[{"refType":"CHILD_OF","spanID":"df89f1712cb9fdec","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"f30e92001c694787","startTime":1750044449743000,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"payment"},{"key":"app.payment.card_type","type":"string","value":"visa"},{"key":"app.payment.card_valid","type":"string","value":"true"},{"key":"app.payment.charged","type":"string","value":"false"},{"key":"error","type":"string","value":"unset"},{"key":"app.loyalty.level","type":"string","value":"silver"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":6,"logs":[],"operationName":"resolveBoolean","processID":"p5","references":[{"refType":"CHILD_OF","spanID":"3af2ca071042ef47","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"ab8c870e76bbe57f","startTime":1750044449753032,"tags":[{"key":"error","type":"string","value":"unset"},{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"jsonEvaluator"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":70,"logs":[],"operationName":"resolveBoolean","processID":"p5","references":[{"refType":"CHILD_OF","spanID":"9d054ff4aeb2b518","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"3af2ca071042ef47","startTime":1750044449753027,"tags":[{"key":"error","type":"string","value":"unset"},{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"flagd.evaluation.v1"},{"key":"feature_flag.key","type":"string","value":"cartFailure"},{"key":"feature_flag.provider_name","type":"string","value":"flagd"},{"key":"feature_flag.variant","type":"string","value":"off"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":19817,"logs":[{"timestamp":1750044449735392,"fields":[{"key":"event","type":"string","value":"Received Quote"},{"key":"app.shipping.cost.total","type":"string","value":"227.50"}]}],"operationName":"/get-quote","processID":"p6","references":[{"refType":"CHILD_OF","spanID":"7b92ebafc9a2a0f1","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"599cbbf8e81ddaca","startTime":1750044449715635,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"opentelemetry-instrumentation-actix-web"},{"key":"otel.scope.version","type":"string","value":"0.22.0"},{"key":"client.address","type":"string","value":"172.18.0.23"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"http.route","type":"string","value":"/get-quote"},{"key":"network.protocol.version","type":"string","value":"1.1"},{"key":"server.address","type":"string","value":"shipping"},{"key":"server.port","type":"string","value":"50050"},{"key":"url.path","type":"string","value":"/get-quote"},{"key":"url.scheme","type":"string","value":"http"},{"key":"user_agent.original","type":"string","value":"Go-http-client/1.1"},{"key":"error","type":"string","value":"unset"},{"key":"app.shipping.cost.total","type":"string","value":"227.50"},{"key":"messaging.message.body.size","type":"string","value":"182"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":283,"logs":[],"operationName":"sinatra.render_template","processID":"p1","references":[{"refType":"CHILD_OF","spanID":"1fd5f529c2dd316b","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"bc5f262c2f7d9bb5","startTime":1750044449770317,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry::Instrumentation::Sinatra"},{"key":"otel.scope.version","type":"string","value":"0.25.0"},{"key":"error","type":"string","value":"unset"},{"key":"sinatra.template_name","type":"string","value":"layout"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":961,"logs":[],"operationName":"sinatra.render_template","processID":"p1","references":[{"refType":"CHILD_OF","spanID":"032bf7007e123e8d","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"1fd5f529c2dd316b","startTime":1750044449769761,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry::Instrumentation::Sinatra"},{"key":"otel.scope.version","type":"string","value":"0.25.0"},{"key":"error","type":"string","value":"unset"},{"key":"sinatra.template_name","type":"string","value":"confirmation"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":6755,"logs":[],"operationName":"oteldemo.PaymentService/Charge","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"7683762fa74ffd1c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"530667cc212dd6ed","startTime":1750044449739280,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"Charge"},{"key":"rpc.service","type":"string","value":"oteldemo.PaymentService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.14"},{"key":"server.port","type":"string","value":"50051"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":1831,"logs":[],"operationName":"oteldemo.CartService/GetCart","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"96f2298052cc3fda","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"111cb151fdd9a915","startTime":1750044449708652,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"GetCart"},{"key":"rpc.service","type":"string","value":"oteldemo.CartService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.10"},{"key":"server.port","type":"string","value":"7070"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":46,"logs":[],"operationName":"/ship-order","processID":"p6","references":[{"refType":"CHILD_OF","spanID":"92345ad5d7cb4190","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"d1253691f90f5b95","startTime":1750044449746781,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"opentelemetry-instrumentation-actix-web"},{"key":"otel.scope.version","type":"string","value":"0.22.0"},{"key":"client.address","type":"string","value":"172.18.0.23"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"http.route","type":"string","value":"/ship-order"},{"key":"network.protocol.version","type":"string","value":"1.1"},{"key":"server.address","type":"string","value":"shipping"},{"key":"server.port","type":"string","value":"50050"},{"key":"url.path","type":"string","value":"/ship-order"},{"key":"url.scheme","type":"string","value":"http"},{"key":"user_agent.original","type":"string","value":"Go-http-client/1.1"},{"key":"error","type":"string","value":"unset"},{"key":"messaging.message.body.size","type":"string","value":"182"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":128,"logs":[{"timestamp":1750044449717887,"fields":[{"key":"event","type":"string","value":"Calculating quote"}]},{"timestamp":1750044449717919,"fields":[{"key":"event","type":"string","value":"Quote calculated, returning its value"}]}],"operationName":"calculate-quote","processID":"p2","references":[{"refType":"CHILD_OF","spanID":"ea80042fbe6e5887","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"0b119b964828c67b","startTime":1750044449717886,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"manual-instrumentation"},{"key":"error","type":"string","value":"unset"},{"key":"app.quote.cost.total","type":"string","value":"227.5"},{"key":"app.quote.items.count","type":"string","value":"5"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":78545,"logs":[],"operationName":"router frontend egress","processID":"p8","references":[{"refType":"CHILD_OF","spanID":"d66da216bedd159f","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"df1b3d5c8e0ab6be","startTime":1750044449701376,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"component","type":"string","value":"proxy"},{"key":"http.protocol","type":"string","value":"HTTP/1.1"},{"key":"peer.address","type":"string","value":"172.18.0.24:8080"},{"key":"upstream_address","type":"string","value":"172.18.0.24:8080"},{"key":"upstream_cluster","type":"string","value":"frontend"},{"key":"upstream_cluster.name","type":"string","value":"frontend"},{"key":"error","type":"string","value":"unset"},{"key":"http.status_code","type":"string","value":"200"},{"key":"response_flags","type":"string","value":"-"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":915,"logs":[{"timestamp":1750044449709335,"fields":[{"key":"event","type":"string","value":"Fetch cart"}]}],"operationName":"POST /oteldemo.CartService/GetCart","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"111cb151fdd9a915","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"fefa4832f9254043","startTime":1750044449709238,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"Microsoft.AspNetCore"},{"key":"grpc.method","type":"string","value":"/oteldemo.CartService/GetCart"},{"key":"grpc.status_code","type":"string","value":"0"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"http.route","type":"string","value":"/oteldemo.CartService/GetCart"},{"key":"network.protocol.version","type":"string","value":"2"},{"key":"server.address","type":"string","value":"cart"},{"key":"server.port","type":"string","value":"7070"},{"key":"url.path","type":"string","value":"/oteldemo.CartService/GetCart"},{"key":"url.scheme","type":"string","value":"http"},{"key":"error","type":"string","value":"unset"},{"key":"app.cart.items.count","type":"string","value":"5"},{"key":"app.user.id","type":"string","value":"d526648e-4a61-11f0-8b6b-b20e5443dfb5"},{"key":"user_agent.original","type":"string","value":"grpc-go/1.72.2"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":710,"logs":[],"operationName":"oteldemo.ProductCatalogService/GetProduct","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"96f2298052cc3fda","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"7e5e7c2f1ea9cb0b","startTime":1750044449710565,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"GetProduct"},{"key":"rpc.service","type":"string","value":"oteldemo.ProductCatalogService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.19"},{"key":"server.port","type":"string","value":"3550"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":69871,"logs":[{"timestamp":1750044449737830,"fields":[{"key":"event","type":"string","value":"prepared"}]},{"timestamp":1750044449739261,"fields":[{"key":"feature_flag.key","type":"string","value":"paymentUnreachable"},{"key":"feature_flag.provider_name","type":"string","value":"flagd"},{"key":"feature_flag.variant","type":"string","value":"off"},{"key":"event","type":"string","value":"feature_flag"}]},{"timestamp":1750044449746517,"fields":[{"key":"event","type":"string","value":"charged"},{"key":"app.payment.transaction.id","type":"string","value":"bbf912fe-0a55-4704-8eb9-02d43f60297d"}]},{"timestamp":1750044449746988,"fields":[{"key":"event","type":"string","value":"shipped"},{"key":"app.shipping.tracking.id","type":"string","value":"4668b5f9-17e2-4311-8b20-c7cf3b08ab39"}]},{"timestamp":1750044449776318,"fields":[{"key":"feature_flag.key","type":"string","value":"kafkaQueueProblems"},{"key":"feature_flag.provider_name","type":"string","value":"flagd"},{"key":"feature_flag.variant","type":"string","value":"off"},{"key":"event","type":"string","value":"feature_flag"}]}],"operationName":"oteldemo.CheckoutService/PlaceOrder","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"b1cf4a62984b9984","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"7683762fa74ffd1c","startTime":1750044449706551,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"app.order.items.count","type":"string","value":"1"},{"key":"app.user.currency","type":"string","value":"USD"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"PlaceOrder"},{"key":"rpc.service","type":"string","value":"oteldemo.CheckoutService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.24"},{"key":"server.port","type":"string","value":"38682"},{"key":"error","type":"string","value":"unset"},{"key":"app.order.amount","type":"string","value":"1102"},{"key":"app.order.id","type":"string","value":"d52a1b43-4a61-11f0-9e2b-96226e8767f9"},{"key":"app.shipping.amount","type":"string","value":"227"},{"key":"app.shipping.tracking.id","type":"string","value":"4668b5f9-17e2-4311-8b20-c7cf3b08ab39"},{"key":"app.user.id","type":"string","value":"d526648e-4a61-11f0-8b6b-b20e5443dfb5"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":8349,"logs":[],"operationName":"POST /send_order_confirmation","processID":"p1","references":[{"refType":"CHILD_OF","spanID":"d96adf1246ad7d75","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"739cd04d718779ae","startTime":1750044449766969,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry::Instrumentation::Rack"},{"key":"otel.scope.version","type":"string","value":"0.26.0"},{"key":"http.host","type":"string","value":"email:6060"},{"key":"http.method","type":"string","value":"POST"},{"key":"http.route","type":"string","value":"/send_order_confirmation"},{"key":"http.scheme","type":"string","value":"http"},{"key":"http.status_code","type":"string","value":"200"},{"key":"http.target","type":"string","value":"/send_order_confirmation"},{"key":"http.user_agent","type":"string","value":"Go-http-client/1.1"},{"key":"error","type":"string","value":"unset"},{"key":"app.order.id","type":"string","value":"d52a1b43-4a61-11f0-9e2b-96226e8767f9"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":74743,"logs":[],"operationName":"grpc.oteldemo.CheckoutService/PlaceOrder","processID":"p3","references":[{"refType":"CHILD_OF","spanID":"6b73da57ebca1b82","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"b1cf4a62984b9984","startTime":1750044449702000,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"@opentelemetry/instrumentation-grpc"},{"key":"otel.scope.version","type":"string","value":"0.57.1"},{"key":"net.peer.name","type":"string","value":"checkout"},{"key":"net.peer.port","type":"string","value":"5050"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"PlaceOrder"},{"key":"rpc.service","type":"string","value":"oteldemo.CheckoutService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":12631,"logs":[],"operationName":"oteldemo.CartService/EmptyCart","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"7683762fa74ffd1c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"4e08d386db6de0e6","startTime":1750044449747019,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"EmptyCart"},{"key":"rpc.service","type":"string","value":"oteldemo.CartService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.10"},{"key":"server.port","type":"string","value":"7070"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":11927,"logs":[{"timestamp":1750044449747830,"fields":[{"key":"event","type":"string","value":"Empty cart"}]},{"timestamp":1750044449755100,"fields":[{"key":"feature_flag.key","type":"string","value":"cartFailure"},{"key":"feature_flag.provider_name","type":"string","value":"flagd Provider"},{"key":"feature_flag.variant","type":"string","value":"off"},{"key":"event","type":"string","value":"feature_flag"}]}],"operationName":"POST /oteldemo.CartService/EmptyCart","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"4e08d386db6de0e6","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"d8802687844ff0da","startTime":1750044449747360,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"Microsoft.AspNetCore"},{"key":"app.user.id","type":"string","value":"d526648e-4a61-11f0-8b6b-b20e5443dfb5"},{"key":"feature_flag.key","type":"string","value":"cartFailure"},{"key":"feature_flag.provider_name","type":"string","value":"flagd Provider"},{"key":"feature_flag.variant","type":"string","value":"off"},{"key":"grpc.method","type":"string","value":"/oteldemo.CartService/EmptyCart"},{"key":"grpc.status_code","type":"string","value":"0"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"http.route","type":"string","value":"/oteldemo.CartService/EmptyCart"},{"key":"network.protocol.version","type":"string","value":"2"},{"key":"server.address","type":"string","value":"cart"},{"key":"server.port","type":"string","value":"7070"},{"key":"url.path","type":"string","value":"/oteldemo.CartService/EmptyCart"},{"key":"url.scheme","type":"string","value":"http"},{"key":"user_agent.original","type":"string","value":"grpc-go/1.72.2"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":1733,"logs":[],"operationName":"grpc.oteldemo.ProductCatalogService/GetProduct","processID":"p3","references":[{"refType":"CHILD_OF","spanID":"6b73da57ebca1b82","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"394722a3d65e5bee","startTime":1750044449777000,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"@opentelemetry/instrumentation-grpc"},{"key":"otel.scope.version","type":"string","value":"0.57.1"},{"key":"net.peer.name","type":"string","value":"product-catalog"},{"key":"net.peer.port","type":"string","value":"3550"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"GetProduct"},{"key":"rpc.service","type":"string","value":"oteldemo.ProductCatalogService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":30309,"logs":[],"operationName":"prepareOrderItemsAndShippingQuoteFromCart","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"7683762fa74ffd1c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"96f2298052cc3fda","startTime":1750044449707511,"tags":[{"key":"span.kind","type":"string","value":"internal"},{"key":"otel.scope.name","type":"string","value":"checkout"},{"key":"app.order.items.count","type":"string","value":"1"},{"key":"error","type":"string","value":"unset"},{"key":"app.cart.items.count","type":"string","value":"5"},{"key":"app.shipping.amount","type":"string","value":"227"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":805,"logs":[],"operationName":"orders publish","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"7683762fa74ffd1c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"842ad77105e18d23","startTime":1750044449775517,"tags":[{"key":"span.kind","type":"string","value":"producer"},{"key":"otel.scope.name","type":"string","value":"checkout"},{"key":"messaging.destination.name","type":"string","value":"orders"},{"key":"messaging.kafka.destination.partition","type":"string","value":"0"},{"key":"messaging.kafka.message.offset","type":"string","value":"0"},{"key":"messaging.kafka.producer.success","type":"string","value":"true"},{"key":"messaging.operation","type":"string","value":"publish"},{"key":"messaging.system","type":"string","value":"kafka"},{"key":"network.transport","type":"string","value":"tcp"},{"key":"peer.service","type":"string","value":"kafka"},{"key":"error","type":"string","value":"unset"},{"key":"messaging.kafka.producer.duration_ms","type":"string","value":"0"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":352,"logs":[{"timestamp":1750044449709386,"fields":[{"key":"event","type":"string","value":"Enqueued"}]},{"timestamp":1750044449709400,"fields":[{"key":"event","type":"string","value":"Sent"}]},{"timestamp":1750044449709718,"fields":[{"key":"event","type":"string","value":"ResponseReceived"}]}],"operationName":"HGET","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"fefa4832f9254043","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"1c6fa81981e4960c","startTime":1750044449709366,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry.Instrumentation.StackExchangeRedis"},{"key":"otel.scope.version","type":"string","value":"1.11.0-beta.2"},{"key":"db.redis.database_index","type":"string","value":"0"},{"key":"db.redis.flags","type":"string","value":"None"},{"key":"db.system","type":"string","value":"redis"},{"key":"server.address","type":"string","value":"valkey-cart"},{"key":"server.port","type":"string","value":"6379"},{"key":"error","type":"string","value":"unset"},{"key":"db.statement","type":"string","value":"HGET d526648e-4a61-11f0-8b6b-b20e5443dfb5"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":22024,"logs":[],"operationName":"HTTP POST","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"96f2298052cc3fda","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"7b92ebafc9a2a0f1","startTime":1750044449713664,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"network.protocol.version","type":"string","value":"1.1"},{"key":"error","type":"string","value":"unset"},{"key":"server.address","type":"string","value":"shipping"},{"key":"server.port","type":"string","value":"50050"},{"key":"url.full","type":"string","value":"http://shipping:50050/get-quote"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":391,"logs":[],"operationName":"HTTP POST","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"7683762fa74ffd1c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"92345ad5d7cb4190","startTime":1750044449746559,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"network.protocol.version","type":"string","value":"1.1"},{"key":"error","type":"string","value":"unset"},{"key":"server.address","type":"string","value":"shipping"},{"key":"server.port","type":"string","value":"50050"},{"key":"url.full","type":"string","value":"http://shipping:50050/ship-order"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":4711,"logs":[],"operationName":"POST","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"64e503f233846241","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"31d9931c1b054f86","startTime":1750044449749545,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"System.Net.Http"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"network.protocol.version","type":"string","value":"2"},{"key":"server.address","type":"string","value":"flagd"},{"key":"server.port","type":"string","value":"8013"},{"key":"url.full","type":"string","value":"http://flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":15663,"logs":[],"operationName":"HTTP POST","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"7683762fa74ffd1c","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"d96adf1246ad7d75","startTime":1750044449759771,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"network.protocol.version","type":"string","value":"1.1"},{"key":"error","type":"string","value":"unset"},{"key":"server.address","type":"string","value":"email"},{"key":"server.port","type":"string","value":"6060"},{"key":"url.full","type":"string","value":"http://email:6060/send_order_confirmation"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":3076,"logs":[],"operationName":"grpc.oteldemo.PaymentService/Charge","processID":"p4","references":[{"refType":"CHILD_OF","spanID":"530667cc212dd6ed","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"df89f1712cb9fdec","startTime":1750044449742000,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"@opentelemetry/instrumentation-grpc"},{"key":"otel.scope.version","type":"string","value":"0.57.1"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"Charge"},{"key":"rpc.service","type":"string","value":"oteldemo.PaymentService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"unset"},{"key":"app.payment.amount","type":"string","value":"1102.50"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":79737,"logs":[],"operationName":"POST","processID":"p10","references":[],"spanID":"10d27d153c44c541","startTime":1750044449700847,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"opentelemetry.instrumentation.requests"},{"key":"otel.scope.version","type":"string","value":"0.55b0"},{"key":"http.method","type":"string","value":"POST"},{"key":"http.status_code","type":"string","value":"200"},{"key":"http.url","type":"string","value":"http://frontend-proxy:8080/api/checkout"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":421,"logs":[{"timestamp":1750044449755249,"fields":[{"key":"event","type":"string","value":"Enqueued"}]},{"timestamp":1750044449755262,"fields":[{"key":"event","type":"string","value":"Sent"}]},{"timestamp":1750044449755655,"fields":[{"key":"event","type":"string","value":"ResponseReceived"}]}],"operationName":"HMSET","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"d8802687844ff0da","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"5f78a21a81d1a9a3","startTime":1750044449755233,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry.Instrumentation.StackExchangeRedis"},{"key":"otel.scope.version","type":"string","value":"1.11.0-beta.2"},{"key":"db.redis.database_index","type":"string","value":"0"},{"key":"db.redis.flags","type":"string","value":"DemandMaster"},{"key":"db.system","type":"string","value":"redis"},{"key":"server.address","type":"string","value":"valkey-cart"},{"key":"server.port","type":"string","value":"6379"},{"key":"error","type":"string","value":"unset"},{"key":"db.statement","type":"string","value":"HMSET d526648e-4a61-11f0-8b6b-b20e5443dfb5"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":5855,"logs":[],"operationName":"flagd.evaluation.v1.Service/ResolveBoolean","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"d8802687844ff0da","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"64e503f233846241","startTime":1750044449749012,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry.Instrumentation.GrpcNetClient"},{"key":"otel.scope.version","type":"string","value":"1.11.0-beta.2"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"ResolveBoolean"},{"key":"rpc.service","type":"string","value":"flagd.evaluation.v1.Service"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"flagd"},{"key":"server.port","type":"string","value":"8013"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":136,"logs":[{"timestamp":1750044449752991,"fields":[{"key":"message.id","type":"string","value":"1"},{"key":"message.type","type":"string","value":"RECEIVED"},{"key":"event","type":"string","value":"message"},{"key":"message.uncompressed_size","type":"string","value":"15"}]},{"timestamp":1750044449753111,"fields":[{"key":"message.id","type":"string","value":"1"},{"key":"message.type","type":"string","value":"SENT"},{"key":"message.uncompressed_size","type":"string","value":"15"},{"key":"event","type":"string","value":"message"}]}],"operationName":"flagd.evaluation.v1.Service/ResolveBoolean","processID":"p5","references":[{"refType":"CHILD_OF","spanID":"31d9931c1b054f86","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"9d054ff4aeb2b518","startTime":1750044449752984,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"connectrpc.com/otelconnect"},{"key":"otel.scope.version","type":"string","value":"semver:0.6.0-dev"},{"key":"rpc.method","type":"string","value":"ResolveBoolean"},{"key":"rpc.service","type":"string","value":"flagd.evaluation.v1.Service"},{"key":"error","type":"string","value":"unset"},{"key":"net.peer.name","type":"string","value":"172.18.0.10"},{"key":"net.peer.port","type":"string","value":"46838"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.system","type":"string","value":"grpc"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":877,"logs":[{"timestamp":1750044449755696,"fields":[{"key":"event","type":"string","value":"Enqueued"}]},{"timestamp":1750044449755708,"fields":[{"key":"event","type":"string","value":"Sent"}]},{"timestamp":1750044449756563,"fields":[{"key":"event","type":"string","value":"ResponseReceived"}]}],"operationName":"EXPIRE","processID":"p9","references":[{"refType":"CHILD_OF","spanID":"d8802687844ff0da","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"4a42b7a5fa81bdfb","startTime":1750044449755686,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"OpenTelemetry.Instrumentation.StackExchangeRedis"},{"key":"otel.scope.version","type":"string","value":"1.11.0-beta.2"},{"key":"db.redis.database_index","type":"string","value":"0"},{"key":"db.redis.flags","type":"string","value":"DemandMaster"},{"key":"db.system","type":"string","value":"redis"},{"key":"server.address","type":"string","value":"valkey-cart"},{"key":"server.port","type":"string","value":"6379"},{"key":"error","type":"string","value":"unset"},{"key":"db.statement","type":"string","value":"EXPIRE d526648e-4a61-11f0-8b6b-b20e5443dfb5"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":2157,"logs":[],"operationName":"oteldemo.CurrencyService/Convert","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"96f2298052cc3fda","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"34a9d7aa3afe1688","startTime":1750044449711310,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"Convert"},{"key":"rpc.service","type":"string","value":"oteldemo.CurrencyService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.18"},{"key":"server.port","type":"string","value":"7001"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":2021,"logs":[],"operationName":"oteldemo.CurrencyService/Convert","processID":"p7","references":[{"refType":"CHILD_OF","spanID":"96f2298052cc3fda","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"11295d69d0e661dd","startTime":1750044449735781,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"Convert"},{"key":"rpc.service","type":"string","value":"oteldemo.CurrencyService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"server.address","type":"string","value":"172.18.0.18"},{"key":"server.port","type":"string","value":"7001"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":77796,"logs":[],"operationName":"POST /api/checkout","processID":"p3","references":[{"refType":"CHILD_OF","spanID":"47c48aa63a0c5a3d","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"01468af9419620f5","startTime":1750044449701000,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"next.js"},{"key":"otel.scope.version","type":"string","value":"0.0.1"},{"key":"http.method","type":"string","value":"POST"},{"key":"http.status_code","type":"string","value":"200"},{"key":"http.target","type":"string","value":"/api/checkout"},{"key":"next.rsc","type":"string","value":"false"},{"key":"next.span_name","type":"string","value":"POST /api/checkout"},{"key":"next.span_type","type":"string","value":"BaseServer.handleRequest"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":19397,"logs":[],"operationName":"POST quote","processID":"p6","references":[{"refType":"CHILD_OF","spanID":"599cbbf8e81ddaca","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"09b03b9b5481c29c","startTime":1750044449715774,"tags":[{"key":"span.kind","type":"string","value":"client"},{"key":"otel.scope.name","type":"string","value":"opentelemetry-instrumentation-actix-web"},{"key":"otel.scope.version","type":"string","value":"0.22.0"},{"key":"http.request.method","type":"string","value":"POST"},{"key":"http.response.status_code","type":"string","value":"200"},{"key":"server.address","type":"string","value":"quote"},{"key":"server.port","type":"string","value":"8090"},{"key":"url.full","type":"string","value":"http://quote:8090/getquote"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":75,"logs":[{"timestamp":1750044449711020,"fields":[{"key":"event","type":"string","value":"Product Found"}]}],"operationName":"oteldemo.ProductCatalogService/GetProduct","processID":"p11","references":[{"refType":"CHILD_OF","spanID":"7e5e7c2f1ea9cb0b","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"5b997902f830009b","startTime":1750044449710969,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"GetProduct"},{"key":"rpc.service","type":"string","value":"oteldemo.ProductCatalogService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"unset"},{"key":"app.product.id","type":"string","value":"0PUK6V6EV0"},{"key":"app.product.name","type":"string","value":"Solar System Color Imager"},{"key":"server.address","type":"string","value":"172.18.0.23"},{"key":"server.port","type":"string","value":"56058"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":78,"logs":[{"timestamp":1750044449778775,"fields":[{"key":"event","type":"string","value":"Product Found"}]}],"operationName":"oteldemo.ProductCatalogService/GetProduct","processID":"p11","references":[{"refType":"CHILD_OF","spanID":"394722a3d65e5bee","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"212f00429ff724f5","startTime":1750044449778734,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"},{"key":"otel.scope.version","type":"string","value":"0.61.0"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"GetProduct"},{"key":"rpc.service","type":"string","value":"oteldemo.ProductCatalogService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"unset"},{"key":"app.product.id","type":"string","value":"0PUK6V6EV0"},{"key":"app.product.name","type":"string","value":"Solar System Color Imager"},{"key":"server.address","type":"string","value":"172.18.0.24"},{"key":"server.port","type":"string","value":"47538"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":597,"logs":[{"timestamp":1750044449711719,"fields":[{"key":"event","type":"string","value":"Processing currency conversion request"}]},{"timestamp":1750044449711741,"fields":[{"key":"event","type":"string","value":"Conversion successful, response sent back"}]}],"operationName":"Currency/Convert","processID":"p12","references":[{"refType":"CHILD_OF","spanID":"34a9d7aa3afe1688","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"42e4324fcb045b99","startTime":1750044449711715,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"currency"},{"key":"app.currency.conversion.from","type":"string","value":"USD"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"Convert"},{"key":"rpc.service","type":"string","value":"oteldemo.CurrencyService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"false"},{"key":"app.currency.conversion.to","type":"string","value":"USD"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":655,"logs":[{"timestamp":1750044449736390,"fields":[{"key":"event","type":"string","value":"Processing currency conversion request"}]},{"timestamp":1750044449736414,"fields":[{"key":"event","type":"string","value":"Conversion successful, response sent back"}]}],"operationName":"Currency/Convert","processID":"p12","references":[{"refType":"CHILD_OF","spanID":"11295d69d0e661dd","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"adb556f3c99b633d","startTime":1750044449736386,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"otel.scope.name","type":"string","value":"currency"},{"key":"app.currency.conversion.from","type":"string","value":"USD"},{"key":"rpc.grpc.status_code","type":"string","value":"0"},{"key":"rpc.method","type":"string","value":"Convert"},{"key":"rpc.service","type":"string","value":"oteldemo.CurrencyService"},{"key":"rpc.system","type":"string","value":"grpc"},{"key":"error","type":"string","value":"false"},{"key":"app.currency.conversion.to","type":"string","value":"USD"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null},{"duration":78648,"logs":[],"operationName":"ingress","processID":"p8","references":[{"refType":"CHILD_OF","spanID":"10d27d153c44c541","traceID":"9e06226196051d9c3c10dfab343791ad"}],"spanID":"d66da216bedd159f","startTime":1750044449701298,"tags":[{"key":"span.kind","type":"string","value":"server"},{"key":"component","type":"string","value":"proxy"},{"key":"downstream_cluster","type":"string","value":"-"},{"key":"http.protocol","type":"string","value":"HTTP/1.1"},{"key":"node_id","type":"string","value":"-"},{"key":"peer.address","type":"string","value":"172.18.0.25"},{"key":"zone","type":"string","value":"-"},{"key":"guid:x-request-id","type":"string","value":"347edd6d-e273-953e-87f6-7ba07f352331"},{"key":"http.method","type":"string","value":"POST"},{"key":"http.status_code","type":"string","value":"200"},{"key":"http.url","type":"string","value":"http://frontend-proxy:8080/api/checkout"},{"key":"request_size","type":"string","value":"388"},{"key":"response_flags","type":"string","value":"-"},{"key":"response_size","type":"string","value":"857"},{"key":"upstream_cluster","type":"string","value":"frontend"},{"key":"upstream_cluster.name","type":"string","value":"frontend"},{"key":"user_agent","type":"string","value":"python-requests/2.32.4"},{"key":"error","type":"string","value":"unset"}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null}],"traceID":"9e06226196051d9c3c10dfab343791ad","warnings":null}],"errors":null,"limit":0,"offset":0,"total":1} ``` + +### Querying dependencies + +The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint. +This endpoint provides the following params: +- `endTs`: the end timestamp in unix milliseconds. Current timestamp will be used if empty. +- `lookback`: the lookbehind window duration in milliseconds. Default to `1m` if empty (not controlled by `-search.traceMaxDependencyLookbehind` flag). + +By default, the `lookback` duration cannot exceed `-search.traceMaxDependencyLookbehind` flag (default `1m`), which help preventing +heavy queries for dependency analysis. + +Here are examples of the dependency API: + +1. Show dependencies within a time range: +```sh +curl http://:10428/select/jaeger/api/dependencies?endTs=1758213428616&lookback=60000 +``` + +Here's a response example: +```json +{"data":[{"parent":"shipping","child":"quote","callCount":2},{"parent":"checkout","child":"cart","callCount":4},{"parent":"frontend-proxy","child":"frontend","callCount":1193},{"parent":"cart","child":"flagd","callCount":2},{"parent":"checkout","child":"shipping","callCount":4},{"parent":"recommendation","child":"product-catalog","callCount":68},{"parent":"frontend","child":"cart","callCount":155},{"parent":"frontend","child":"recommendation","callCount":64},{"parent":"checkout","child":"product-catalog","callCount":6},{"parent":"checkout","child":"currency","callCount":8},{"parent":"checkout","child":"payment","callCount":2},{"parent":"frontend","child":"product-catalog","callCount":350},{"parent":"load-generator","child":"frontend-proxy","callCount":32},{"parent":"frontend-proxy","child":"image-provider","callCount":591},{"parent":"frontend-proxy","child":"flagd","callCount":118},{"parent":"frontend","child":"currency","callCount":141},{"parent":"frontend-web","child":"frontend-proxy","callCount":333},{"parent":"checkout","child":"email","callCount":2},{"parent":"frontend","child":"checkout","callCount":2}],"errors": null,"limit": 0,"offset": 0,"total":19} +``` From 6005d16f7ac572d80072b9323960a05639a8608c Mon Sep 17 00:00:00 2001 From: Jiekun Date: Fri, 19 Sep 2025 00:50:46 +0800 Subject: [PATCH 07/26] feature: [dependency] add API doc --- docs/victoriatraces/querying/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 3bbaef592..b4ed0e85f 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -122,6 +122,7 @@ Here's a response example: ### Querying dependencies The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint. + This endpoint provides the following params: - `endTs`: the end timestamp in unix milliseconds. Current timestamp will be used if empty. - `lookback`: the lookbehind window duration in milliseconds. Default to `1m` if empty (not controlled by `-search.traceMaxDependencyLookbehind` flag). From 7b63c52ec7044ea4666888b168e79abe2af18efd Mon Sep 17 00:00:00 2001 From: Jiekun Date: Wed, 24 Sep 2025 22:20:28 +0800 Subject: [PATCH 08/26] feature: [dependency] temporary result for dependency APIs --- app/victoria-traces/main.go | 2 + app/vtbackground/servicegraph.go | 158 +++++++++++++++++++++++++++++ app/vtselect/traces/query/query.go | 32 +----- 3 files changed, 162 insertions(+), 30 deletions(-) create mode 100644 app/vtbackground/servicegraph.go diff --git a/app/victoria-traces/main.go b/app/victoria-traces/main.go index 2ddb7fc16..4d1e61a77 100644 --- a/app/victoria-traces/main.go +++ b/app/victoria-traces/main.go @@ -15,6 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics" + "github.com/VictoriaMetrics/VictoriaTraces/app/vtbackground" "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert" "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/insertutil" "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect" @@ -48,6 +49,7 @@ func main() { insertutil.SetLogRowsStorage(&vtstorage.Storage{}) vtinsert.Init() + vtbackground.InitServiceGraph() go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{ UseProxyProtocol: useProxyProtocol, diff --git a/app/vtbackground/servicegraph.go b/app/vtbackground/servicegraph.go new file mode 100644 index 000000000..76f517216 --- /dev/null +++ b/app/vtbackground/servicegraph.go @@ -0,0 +1,158 @@ +package vtbackground + +import ( + "context" + "flag" + "fmt" + "net/http" + "strings" + "sync" + "time" + + "github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + + "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/insertutil" + "github.com/VictoriaMetrics/VictoriaTraces/app/vtstorage" + otelpb "github.com/VictoriaMetrics/VictoriaTraces/lib/protoparser/opentelemetry/pb" +) + +var ( + enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph.") + serviceGraphInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting `-servicegraph.enable=true`.") + serviceGraphLookbehind = flag.Duration("servicegraph.Lookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") +) + +// Row represent the query result of a trace span. +type Row struct { + Fields []logstorage.Field +} + +type dependencyLink struct { + parent string + child string + callCount uint64 +} + +func InitServiceGraph() { + if !*enableServiceGraph { + return + } + ticker := time.NewTicker(*serviceGraphInterval) + go func() { + for { + select { + case <-ticker.C: + GetServiceGraphLastMin(context.TODO()) + } + } + }() +} + +func GetServiceGraphLastMin(ctx context.Context) { + qStrChildSpans := fmt.Sprintf( + `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s, %s as child`, + otelpb.ParentSpanIDField, + otelpb.KindField, + otelpb.SpanKind(2), + otelpb.SpanKind(5), + otelpb.ParentSpanIDField, + otelpb.ResourceAttrServiceName, + otelpb.ParentSpanIDField, + otelpb.SpanIDField, + otelpb.ResourceAttrServiceName, + ) + qStrParentSpans := fmt.Sprintf( + `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as parent`, + otelpb.SpanIDField, + otelpb.KindField, + otelpb.SpanKind(3), + otelpb.SpanKind(4), + otelpb.SpanIDField, + otelpb.ResourceAttrServiceName, + otelpb.ResourceAttrServiceName, + ) + qStr := fmt.Sprintf( + `%s | join by (%s) (%s) inner | NOT parent:eq_field(child) | stats by (parent, child) count() callCount`, + qStrChildSpans, + otelpb.SpanIDField, + qStrParentSpans, + ) + + endTime := time.Now().Truncate(-*serviceGraphInterval) + startTime := endTime.Add(-*serviceGraphLookbehind) + + q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime.UnixNano()) + if err != nil { + logger.Errorf("cannot parse query [%s]: %s", qStr, err) + return + } + q.AddTimeFilter(startTime.UnixNano(), endTime.UnixNano()) + q.AddPipeOffsetLimit(0, 1000) + + qs := &logstorage.QueryStats{} + + qctx := logstorage.NewQueryContext(ctx, qs, []logstorage.TenantID{{}}, q) + + var rowsLock sync.Mutex + var rows []*Row + //var missingTimeColumn atomic.Bool + writeBlock := func(_ uint, db *logstorage.DataBlock) { + columns := db.Columns + if len(columns) == 0 { + return + } + clonedColumnNames := make([]string, len(columns)) + valuesCount := 0 + for i, c := range columns { + clonedColumnNames[i] = strings.Clone(c.Name) + if len(c.Values) > valuesCount { + valuesCount = len(c.Values) + } + } + if valuesCount == 0 { + return + } + for i := 0; i < valuesCount; i++ { + fields := make([]logstorage.Field, 0, len(columns)) + for j := range columns { + fields = append( + fields, + logstorage.Field{ + Name: clonedColumnNames[j], + Value: strings.Clone(columns[j].Values[i]), + }, + ) + } + rowsLock.Lock() + rows = append(rows, &Row{ + Fields: fields, + }) + rowsLock.Unlock() + } + } + + if err = vtstorage.RunQuery(qctx, writeBlock); err != nil { + logger.Errorf("cannot execute query [%s]: %s", qStr, err) + return + } + + if len(rows) == 0 { + return + } + + r := &http.Request{} + cp, _ := insertutil.GetCommonParams(r) + lmp := cp.NewLogMessageProcessor("background_task", false) + + for _, row := range rows { + f := append(row.Fields, logstorage.Field{ + Name: "_msg", + Value: "-", + }) + lmp.AddRow(endTime.UnixNano(), f, []logstorage.Field{{"service_graph_stream", "-"}}) + } + lmp.MustClose() + + return +} diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index c5a0cb1ec..ac0543880 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -578,43 +578,15 @@ type DependenciesQueryParameters struct { // GetDependencyList returns service dependencies graph edges (parent, child, callCount) in []*Row format. func GetDependencyList(ctx context.Context, cp *CommonParams, param *DependenciesQueryParameters) ([]*Row, error) { - qStrParentSpans := fmt.Sprintf( - `NOT %s:"" | fields %s, %s | rename %s as %s, %s as child`, - otelpb.ParentSpanIDField, - otelpb.ParentSpanIDField, - otelpb.ResourceAttrServiceName, - otelpb.ParentSpanIDField, - otelpb.SpanIDField, - otelpb.ResourceAttrServiceName, - ) - qStrChildSpans := fmt.Sprintf( - `NOT %s:"" | fields %s, %s | rename %s as parent`, - otelpb.SpanIDField, - otelpb.SpanIDField, - otelpb.ResourceAttrServiceName, - otelpb.ResourceAttrServiceName, - ) - qStr := fmt.Sprintf( - `%s | join by (%s) (%s) inner | NOT parent:eq_field(child) | stats by (parent, child) count() callCount`, - qStrParentSpans, - otelpb.SpanIDField, - qStrChildSpans, - ) - - if *traceMaxDependencyLookBehind > 0 && param.Lookback > *traceMaxDependencyLookBehind { - param.Lookback = *traceMaxDependencyLookBehind - } + qStr := `{service_graph_stream="-"} | fields parent, child, callCount | stats by (parent, child) sum(callCount) as callCount` startTime := param.EndTs.Add(-param.Lookback).UnixNano() endTime := param.EndTs.UnixNano() - q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime) if err != nil { return nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err) } q.AddTimeFilter(startTime, endTime) - if *traceMaxDependencyList > 0 { - q.AddPipeOffsetLimit(0, *traceMaxDependencyList) - } + cp.Query = q qctx := cp.NewQueryContext(ctx) From bcc928eb77c56d125751a6a64ded4ae1dde8bcfb Mon Sep 17 00:00:00 2001 From: Jiekun Date: Fri, 26 Sep 2025 13:52:17 +0800 Subject: [PATCH 09/26] feature: [dependency] split dependency to differet apps --- app/vtbackground/servicegraph.go | 120 ++---------------- app/vtinsert/opentelemetry/opentelemetry.go | 19 +++ app/vtselect/traces/query/query.go | 92 +++++++++++++- .../opentelemetry/pb/trace_fields.go | 2 + 4 files changed, 118 insertions(+), 115 deletions(-) diff --git a/app/vtbackground/servicegraph.go b/app/vtbackground/servicegraph.go index 76f517216..73bcdd991 100644 --- a/app/vtbackground/servicegraph.go +++ b/app/vtbackground/servicegraph.go @@ -3,18 +3,13 @@ package vtbackground import ( "context" "flag" - "fmt" "net/http" - "strings" - "sync" "time" - "github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" - "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/insertutil" - "github.com/VictoriaMetrics/VictoriaTraces/app/vtstorage" - otelpb "github.com/VictoriaMetrics/VictoriaTraces/lib/protoparser/opentelemetry/pb" + vtinsert "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/opentelemetry" + vtselect "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect/traces/query" ) var ( @@ -23,17 +18,6 @@ var ( serviceGraphLookbehind = flag.Duration("servicegraph.Lookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") ) -// Row represent the query result of a trace span. -type Row struct { - Fields []logstorage.Field -} - -type dependencyLink struct { - parent string - child string - callCount uint64 -} - func InitServiceGraph() { if !*enableServiceGraph { return @@ -50,109 +34,23 @@ func InitServiceGraph() { } func GetServiceGraphLastMin(ctx context.Context) { - qStrChildSpans := fmt.Sprintf( - `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s, %s as child`, - otelpb.ParentSpanIDField, - otelpb.KindField, - otelpb.SpanKind(2), - otelpb.SpanKind(5), - otelpb.ParentSpanIDField, - otelpb.ResourceAttrServiceName, - otelpb.ParentSpanIDField, - otelpb.SpanIDField, - otelpb.ResourceAttrServiceName, - ) - qStrParentSpans := fmt.Sprintf( - `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as parent`, - otelpb.SpanIDField, - otelpb.KindField, - otelpb.SpanKind(3), - otelpb.SpanKind(4), - otelpb.SpanIDField, - otelpb.ResourceAttrServiceName, - otelpb.ResourceAttrServiceName, - ) - qStr := fmt.Sprintf( - `%s | join by (%s) (%s) inner | NOT parent:eq_field(child) | stats by (parent, child) count() callCount`, - qStrChildSpans, - otelpb.SpanIDField, - qStrParentSpans, - ) + r := &http.Request{} - endTime := time.Now().Truncate(-*serviceGraphInterval) + endTime := time.Now().Truncate(*serviceGraphInterval) startTime := endTime.Add(-*serviceGraphLookbehind) - q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime.UnixNano()) + rows, err := vtselect.GetServiceGraphTimeRange(ctx, r, startTime, endTime) if err != nil { - logger.Errorf("cannot parse query [%s]: %s", qStr, err) + logger.Errorf("cannot get service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) return } - q.AddTimeFilter(startTime.UnixNano(), endTime.UnixNano()) - q.AddPipeOffsetLimit(0, 1000) - - qs := &logstorage.QueryStats{} - - qctx := logstorage.NewQueryContext(ctx, qs, []logstorage.TenantID{{}}, q) - - var rowsLock sync.Mutex - var rows []*Row - //var missingTimeColumn atomic.Bool - writeBlock := func(_ uint, db *logstorage.DataBlock) { - columns := db.Columns - if len(columns) == 0 { - return - } - clonedColumnNames := make([]string, len(columns)) - valuesCount := 0 - for i, c := range columns { - clonedColumnNames[i] = strings.Clone(c.Name) - if len(c.Values) > valuesCount { - valuesCount = len(c.Values) - } - } - if valuesCount == 0 { - return - } - for i := 0; i < valuesCount; i++ { - fields := make([]logstorage.Field, 0, len(columns)) - for j := range columns { - fields = append( - fields, - logstorage.Field{ - Name: clonedColumnNames[j], - Value: strings.Clone(columns[j].Values[i]), - }, - ) - } - rowsLock.Lock() - rows = append(rows, &Row{ - Fields: fields, - }) - rowsLock.Unlock() - } - } - - if err = vtstorage.RunQuery(qctx, writeBlock); err != nil { - logger.Errorf("cannot execute query [%s]: %s", qStr, err) - return - } - if len(rows) == 0 { return } - r := &http.Request{} - cp, _ := insertutil.GetCommonParams(r) - lmp := cp.NewLogMessageProcessor("background_task", false) - - for _, row := range rows { - f := append(row.Fields, logstorage.Field{ - Name: "_msg", - Value: "-", - }) - lmp.AddRow(endTime.UnixNano(), f, []logstorage.Field{{"service_graph_stream", "-"}}) + err = vtinsert.PersistServiceGraph(ctx, r, rows, endTime) + if err != nil { + logger.Errorf("cannot presist service graph for time %d: %s", endTime.Unix(), err) } - lmp.MustClose() - return } diff --git a/app/vtinsert/opentelemetry/opentelemetry.go b/app/vtinsert/opentelemetry/opentelemetry.go index c6fc45233..b0554a799 100644 --- a/app/vtinsert/opentelemetry/opentelemetry.go +++ b/app/vtinsert/opentelemetry/opentelemetry.go @@ -1,6 +1,7 @@ package opentelemetry import ( + "context" "fmt" "net/http" "strconv" @@ -284,3 +285,21 @@ func appendKeyValuesWithPrefixSuffix(fields []logstorage.Field, kvs []*otelpb.Ke } return fields } + +func PersistServiceGraph(ctx context.Context, r *http.Request, fields [][]logstorage.Field, timestamp time.Time) error { + cp, err := insertutil.GetCommonParams(r) + if err != nil { + return err + } + lmp := cp.NewLogMessageProcessor("internalinsert_servicegraph", false) + + for _, row := range fields { + f := append(row, logstorage.Field{ + Name: "_msg", + Value: "-", + }) + lmp.AddRow(timestamp.UnixNano(), f, []logstorage.Field{{otelpb.TraceServiceGraphStreamName, "-"}}) + } + lmp.MustClose() + return nil +} diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index ac0543880..8cb9d747b 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -31,10 +31,6 @@ var ( "This limit affects Jaeger's /api/services API.") traceMaxSpanNameList = flag.Uint64("search.traceMaxSpanNameList", 1000, "The maximum number of span name can return in a get span name request. "+ "This limit affects Jaeger's /api/services/*/operations API.") - traceMaxDependencyList = flag.Uint64("search.traceMaxDependencyList", 1000, "The maximum number of dependency links can return in a get dependencies request. "+ - "This limit affects Jaeger's /api/dependencies API.") - traceMaxDependencyLookBehind = flag.Duration("search.traceMaxDependencyLookbehind", 1*time.Minute, "The maximum window for dependency analysis in real-time. "+ - "Increasing this duration will allow analysis across a longer time range, but it will increase the risk of performance degradation and higher resource usage.") ) var ( @@ -634,3 +630,91 @@ func GetDependencyList(ctx context.Context, cp *CommonParams, param *Dependencie return rows, nil } + +func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, endTime time.Time) ([][]logstorage.Field, error) { + cp, err := GetCommonParams(r) + if err != nil { + return nil, err + } + qStrChildSpans := fmt.Sprintf( + `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s, %s as child`, + otelpb.ParentSpanIDField, + otelpb.KindField, + otelpb.SpanKind(2), + otelpb.SpanKind(5), + otelpb.ParentSpanIDField, + otelpb.ResourceAttrServiceName, + otelpb.ParentSpanIDField, + otelpb.SpanIDField, + otelpb.ResourceAttrServiceName, + ) + qStrParentSpans := fmt.Sprintf( + `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as parent`, + otelpb.SpanIDField, + otelpb.KindField, + otelpb.SpanKind(3), + otelpb.SpanKind(4), + otelpb.SpanIDField, + otelpb.ResourceAttrServiceName, + otelpb.ResourceAttrServiceName, + ) + qStr := fmt.Sprintf( + `%s | join by (%s) (%s) inner | NOT parent:eq_field(child) | stats by (parent, child) count() callCount`, + qStrChildSpans, + otelpb.SpanIDField, + qStrParentSpans, + ) + + q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime.UnixNano()) + if err != nil { + return nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err) + } + q.AddTimeFilter(startTime.UnixNano(), endTime.UnixNano()) + q.AddPipeOffsetLimit(0, 1000) + + cp.Query = q + qctx := cp.NewQueryContext(ctx) + defer cp.UpdatePerQueryStatsMetrics() + + var rowsLock sync.Mutex + var rows [][]logstorage.Field + //var missingTimeColumn atomic.Bool + writeBlock := func(_ uint, db *logstorage.DataBlock) { + columns := db.Columns + if len(columns) == 0 { + return + } + clonedColumnNames := make([]string, len(columns)) + valuesCount := 0 + for i, c := range columns { + clonedColumnNames[i] = strings.Clone(c.Name) + if len(c.Values) > valuesCount { + valuesCount = len(c.Values) + } + } + if valuesCount == 0 { + return + } + for i := 0; i < valuesCount; i++ { + fields := make([]logstorage.Field, 0, len(columns)) + for j := range columns { + fields = append( + fields, + logstorage.Field{ + Name: clonedColumnNames[j], + Value: strings.Clone(columns[j].Values[i]), + }, + ) + } + rowsLock.Lock() + rows = append(rows, fields) + rowsLock.Unlock() + } + } + + if err = vtstorage.RunQuery(qctx, writeBlock); err != nil { + return nil, fmt.Errorf("cannot execute query [%s]: %s", qStr, err) + } + + return rows, nil +} diff --git a/lib/protoparser/opentelemetry/pb/trace_fields.go b/lib/protoparser/opentelemetry/pb/trace_fields.go index 9106607e1..4a9b6a34d 100644 --- a/lib/protoparser/opentelemetry/pb/trace_fields.go +++ b/lib/protoparser/opentelemetry/pb/trace_fields.go @@ -7,6 +7,8 @@ const ( TraceIDIndexStreamName = "trace_id_idx_stream" TraceIDIndexFieldName = "trace_id_idx" TraceIDIndexPartitionCount = uint64(1024) + + TraceServiceGraphStreamName = "trace_service_graph_stream" ) // Resource From 60be6a37c1c381026933a5f9999629e3c0ee3b27 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 14:16:12 +0800 Subject: [PATCH 10/26] feature: [dependency] polish the background job start and stop for service graph --- .../backgroundtask/servicegraph.go | 107 +++++++++ app/victoria-traces/main.go | 5 +- app/vtbackground/servicegraph.go | 56 ----- app/vtselect/internalselect/internalselect.go | 2 +- app/vtselect/logsql/logsql.go | 2 +- app/vtselect/traces/query/query.go | 4 +- app/vtstorage/main.go | 8 + go.mod | 6 +- go.sum | 8 + .../VictoriaLogs/lib/logstorage/consts.go | 5 + .../VictoriaLogs/lib/logstorage/datadb.go | 26 +++ .../logstorage/filter_contains_common_case.go | 114 ++++++++++ .../logstorage/filter_equals_common_case.go | 56 +++++ .../VictoriaLogs/lib/logstorage/if_filter.go | 2 +- .../VictoriaLogs/lib/logstorage/indexdb.go | 48 ++++ .../VictoriaLogs/lib/logstorage/parser.go | 209 +++++++++++++++--- .../VictoriaLogs/lib/logstorage/pipe.go | 22 +- .../VictoriaLogs/lib/logstorage/pipe_copy.go | 8 +- .../lib/logstorage/pipe_extract.go | 7 - .../lib/logstorage/pipe_extract_regexp.go | 7 - .../lib/logstorage/pipe_facets.go | 12 +- .../lib/logstorage/pipe_filter.go | 2 +- .../lib/logstorage/pipe_format.go | 24 +- .../lib/logstorage/pipe_rename.go | 13 +- .../lib/logstorage/pipe_set_stream_fields.go | 196 ++++++++++++++++ .../VictoriaLogs/lib/logstorage/pipe_sort.go | 1 + .../lib/logstorage/pipe_stream_context.go | 10 +- .../lib/logstorage/pipe_unpack.go | 10 +- .../lib/logstorage/pipe_unpack_json.go | 2 +- .../lib/logstorage/pipe_unpack_logfmt.go | 2 +- .../lib/logstorage/pipe_unpack_syslog.go | 2 +- .../lib/logstorage/pipe_unroll.go | 2 +- .../lib/logstorage/pipe_update.go | 2 +- .../lib/logstorage/stats_count_uniq.go | 41 +++- .../lib/logstorage/stats_count_uniq_hash.go | 49 +++- .../VictoriaLogs/lib/logstorage/storage.go | 30 +++ .../lib/logstorage/storage_search.go | 135 +++++++++-- .../lib/logstorage/stream_tags.go | 4 + .../lib/logstorage/syslog_parser.go | 187 +++++++++++++++- .../lib/logstorage/values_encoder.go | 5 + .../lib/appmetrics/appmetrics.go | 5 +- .../VictoriaMetrics/lib/buildinfo/version.go | 8 + .../VictoriaMetrics/lib/fs/dir_remover.go | 8 +- .../lib/timerpool/timerpool.go | 8 +- .../concurrencylimiter.go | 3 +- .../VictoriaMetrics/metricsql/utils.go | 3 +- vendor/modules.txt | 8 +- 47 files changed, 1258 insertions(+), 216 deletions(-) create mode 100644 app/victoria-traces/backgroundtask/servicegraph.go delete mode 100644 app/vtbackground/servicegraph.go create mode 100644 vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go create mode 100644 vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_equals_common_case.go create mode 100644 vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/pipe_set_stream_fields.go diff --git a/app/victoria-traces/backgroundtask/servicegraph.go b/app/victoria-traces/backgroundtask/servicegraph.go new file mode 100644 index 000000000..2da542e3d --- /dev/null +++ b/app/victoria-traces/backgroundtask/servicegraph.go @@ -0,0 +1,107 @@ +package backgroundtask + +import ( + "context" + "flag" + "net/http" + "strconv" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + + vtinsert "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/opentelemetry" + vtselect "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect/traces/query" + "github.com/VictoriaMetrics/VictoriaTraces/app/vtstorage" +) + +var ( + enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") + serviceGraphInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting `-servicegraph.enable=true`.") + serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting `-servicegraph.enable=true`.") + serviceGraphLookbehind = flag.Duration("servicegraph.lookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") +) + +var ( + sgt *serviceGraphTask +) + +func Init() { + if *enableServiceGraph { + sgt = newServiceGraphTask() + sgt.Start() + } + return +} + +func Stop() { + if *enableServiceGraph { + sgt.Stop() + } + return +} + +type serviceGraphTask struct { + stopCh chan struct{} +} + +func newServiceGraphTask() *serviceGraphTask { + return &serviceGraphTask{ + stopCh: make(chan struct{}), + } +} + +func (sgt *serviceGraphTask) Start() { + logger.Infof("starting background task for service graph, interval: %v, lookbehind: %v", *serviceGraphInterval, *serviceGraphLookbehind) + ticker := time.NewTicker(*serviceGraphInterval) + go func() { + for { + select { + case <-sgt.stopCh: + return + case <-ticker.C: + ctx, cancelFunc := context.WithTimeout(context.Background(), *serviceGraphTaskTimeout) + GenerateServiceGraphTimeRange(ctx) + cancelFunc() + } + } + }() + return +} + +func (sgt *serviceGraphTask) Stop() { + close(sgt.stopCh) + return +} + +func GenerateServiceGraphTimeRange(ctx context.Context) { + endTime := time.Now().Truncate(*serviceGraphInterval) + startTime := endTime.Add(-*serviceGraphLookbehind) + + tenantIDs, err := vtstorage.GetTenantIDsByTimeRange(ctx, startTime.UnixNano(), endTime.UnixNano()) + if err != nil { + logger.Errorf("cannot get tenant ids: %s", err) + return + } + + // query and persist operations are executed sequentially, which helps not to consume excessive resources. + for _, tenantID := range tenantIDs { + r, _ := http.NewRequestWithContext(ctx, "", "", nil) + r.Header.Set("AccountID", strconv.FormatUint(uint64(tenantID.AccountID), 10)) + r.Header.Set("ProjectID", strconv.FormatUint(uint64(tenantID.ProjectID), 10)) + rows, err := vtselect.GetServiceGraphTimeRange(ctx, r, startTime, endTime) + if err != nil { + logger.Errorf("cannot get service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) + return + } + if len(rows) == 0 { + return + } + + err = vtinsert.PersistServiceGraph(ctx, r, rows, endTime) + if err != nil { + logger.Errorf("cannot presist service graph for time %d: %s", endTime.Unix(), err) + } + } + + return +} diff --git a/app/victoria-traces/main.go b/app/victoria-traces/main.go index 4d1e61a77..54a890ed9 100644 --- a/app/victoria-traces/main.go +++ b/app/victoria-traces/main.go @@ -15,7 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics" - "github.com/VictoriaMetrics/VictoriaTraces/app/vtbackground" + "github.com/VictoriaMetrics/VictoriaTraces/app/victoria-traces/backgroundtask" "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert" "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/insertutil" "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect" @@ -49,7 +49,7 @@ func main() { insertutil.SetLogRowsStorage(&vtstorage.Storage{}) vtinsert.Init() - vtbackground.InitServiceGraph() + backgroundtask.Init() go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{ UseProxyProtocol: useProxyProtocol, @@ -68,6 +68,7 @@ func main() { } logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds()) + backgroundtask.Stop() vtinsert.Stop() vtselect.Stop() vtstorage.Stop() diff --git a/app/vtbackground/servicegraph.go b/app/vtbackground/servicegraph.go deleted file mode 100644 index 73bcdd991..000000000 --- a/app/vtbackground/servicegraph.go +++ /dev/null @@ -1,56 +0,0 @@ -package vtbackground - -import ( - "context" - "flag" - "net/http" - "time" - - "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" - - vtinsert "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/opentelemetry" - vtselect "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect/traces/query" -) - -var ( - enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph.") - serviceGraphInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting `-servicegraph.enable=true`.") - serviceGraphLookbehind = flag.Duration("servicegraph.Lookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") -) - -func InitServiceGraph() { - if !*enableServiceGraph { - return - } - ticker := time.NewTicker(*serviceGraphInterval) - go func() { - for { - select { - case <-ticker.C: - GetServiceGraphLastMin(context.TODO()) - } - } - }() -} - -func GetServiceGraphLastMin(ctx context.Context) { - r := &http.Request{} - - endTime := time.Now().Truncate(*serviceGraphInterval) - startTime := endTime.Add(-*serviceGraphLookbehind) - - rows, err := vtselect.GetServiceGraphTimeRange(ctx, r, startTime, endTime) - if err != nil { - logger.Errorf("cannot get service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) - return - } - if len(rows) == 0 { - return - } - - err = vtinsert.PersistServiceGraph(ctx, r, rows, endTime) - if err != nil { - logger.Errorf("cannot presist service graph for time %d: %s", endTime.Unix(), err) - } - return -} diff --git a/app/vtselect/internalselect/internalselect.go b/app/vtselect/internalselect/internalselect.go index 9b8cb3a82..2953e688c 100644 --- a/app/vtselect/internalselect/internalselect.go +++ b/app/vtselect/internalselect/internalselect.go @@ -284,7 +284,7 @@ type commonParams struct { } func (cp *commonParams) NewQueryContext(ctx context.Context) *logstorage.QueryContext { - return logstorage.NewQueryContext(ctx, &cp.qs, cp.TenantIDs, cp.Query) + return logstorage.NewQueryContext(ctx, &cp.qs, cp.TenantIDs, cp.Query, false) } func (cp *commonParams) UpdatePerQueryStatsMetrics() { diff --git a/app/vtselect/logsql/logsql.go b/app/vtselect/logsql/logsql.go index 1c974cdb1..115d5fac6 100644 --- a/app/vtselect/logsql/logsql.go +++ b/app/vtselect/logsql/logsql.go @@ -1113,7 +1113,7 @@ type commonArgs struct { } func (ca *commonArgs) newQueryContext(ctx context.Context) *logstorage.QueryContext { - return logstorage.NewQueryContext(ctx, &ca.qs, ca.tenantIDs, ca.q) + return logstorage.NewQueryContext(ctx, &ca.qs, ca.tenantIDs, ca.q, false) } func (ca *commonArgs) updatePerQueryStatsMetrics() { diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index 8cb9d747b..be712e568 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -47,7 +47,7 @@ type CommonParams struct { } func (cp *CommonParams) NewQueryContext(ctx context.Context) *logstorage.QueryContext { - return logstorage.NewQueryContext(ctx, &cp.qs, cp.TenantIDs, cp.Query) + return logstorage.NewQueryContext(ctx, &cp.qs, cp.TenantIDs, cp.Query, false) } func (cp *CommonParams) UpdatePerQueryStatsMetrics() { @@ -574,7 +574,7 @@ type DependenciesQueryParameters struct { // GetDependencyList returns service dependencies graph edges (parent, child, callCount) in []*Row format. func GetDependencyList(ctx context.Context, cp *CommonParams, param *DependenciesQueryParameters) ([]*Row, error) { - qStr := `{service_graph_stream="-"} | fields parent, child, callCount | stats by (parent, child) sum(callCount) as callCount` + qStr := `{trace_service_graph_stream="-"} | fields parent, child, callCount | stats by (parent, child) sum(callCount) as callCount` startTime := param.EndTs.Add(-param.Lookback).UnixNano() endTime := param.EndTs.UnixNano() q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime) diff --git a/app/vtstorage/main.go b/app/vtstorage/main.go index 386512f3f..1f51a476e 100644 --- a/app/vtstorage/main.go +++ b/app/vtstorage/main.go @@ -1,6 +1,7 @@ package vtstorage import ( + "context" "encoding/json" "flag" "fmt" @@ -488,6 +489,13 @@ func GetStreamIDs(qctx *logstorage.QueryContext, limit uint64) ([]logstorage.Val return netstorageSelect.GetStreamIDs(qctx, limit) } +func GetTenantIDsByTimeRange(ctx context.Context, startTime, endTime int64) ([]logstorage.TenantID, error) { + if localStorage == nil { + return nil, nil + } + return localStorage.GetTenantIDs(ctx, startTime, endTime) +} + func writeStorageMetrics(w io.Writer, strg *logstorage.Storage) { var ss logstorage.StorageStats strg.UpdateStats(&ss) diff --git a/go.mod b/go.mod index bf0c4b3b9..4b7653a36 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/VictoriaMetrics/VictoriaTraces go 1.25.1 require ( - github.com/VictoriaMetrics/VictoriaLogs v1.33.1 - github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250903201027-a0a33f0ce1c2 + github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8 + github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871 github.com/VictoriaMetrics/easyproto v0.1.4 github.com/VictoriaMetrics/fastcache v1.13.0 github.com/VictoriaMetrics/metrics v1.40.1 @@ -17,7 +17,7 @@ require ( ) require ( - github.com/VictoriaMetrics/metricsql v0.84.7 // indirect + github.com/VictoriaMetrics/metricsql v0.84.8 // indirect github.com/golang/snappy v1.0.0 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect diff --git a/go.sum b/go.sum index e04195673..b6907c621 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,13 @@ github.com/VictoriaMetrics/VictoriaLogs v1.33.1 h1:Oi0Jb+AqCuhVf1E6VTNV2/lRnTcQMERmsVzF4BvjV/A= github.com/VictoriaMetrics/VictoriaLogs v1.33.1/go.mod h1:Mbyj/sNaXlbms/05TiWK0Yr6bU7WkQHusBhws2OcWbI= +github.com/VictoriaMetrics/VictoriaLogs v1.34.1-0.20250926053343-ba4afbbcde47 h1:JW2dgJ2voOk5Y6mCDNe8OAl3P8Y+24f2GBeh2MGGNeE= +github.com/VictoriaMetrics/VictoriaLogs v1.34.1-0.20250926053343-ba4afbbcde47/go.mod h1:c56BHhaonceOp5ddzKDPFNQmoG/oBE58/JXek+v714o= +github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8 h1:4Q5+H/7SN9r1Bvdr6H9zwjFp93mlLyaDPXdB1dy+iCs= +github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8/go.mod h1:c56BHhaonceOp5ddzKDPFNQmoG/oBE58/JXek+v714o= github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250903201027-a0a33f0ce1c2 h1:XKK5/XxMw1HdlKic12Wh9rJRKxxpy0MTHBdWi+80MAs= github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250903201027-a0a33f0ce1c2/go.mod h1:mRRUzTxEuernAU9h20/UeZs11dJA2J8LaP9DxbtCag0= +github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871 h1:5G3BS+OSqN6Lie30l+VoNbV8Ks0irHGzvrPmIpijG+A= +github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871/go.mod h1:Lrd9cpKD2edJ+uoXTGEcHMZwnI+vyomJrINld69Z8h4= github.com/VictoriaMetrics/easyproto v0.1.4 h1:r8cNvo8o6sR4QShBXQd1bKw/VVLSQma/V2KhTBPf+Sc= github.com/VictoriaMetrics/easyproto v0.1.4/go.mod h1:QlGlzaJnDfFd8Lk6Ci/fuLxfTo3/GThPs2KH23mv710= github.com/VictoriaMetrics/fastcache v1.13.0 h1:AW4mheMR5Vd9FkAPUv+NH6Nhw+fmbTMGMsNAoA/+4G0= @@ -10,6 +16,8 @@ github.com/VictoriaMetrics/metrics v1.40.1 h1:FrF5uJRpIVj9fayWcn8xgiI+FYsKGMslzP github.com/VictoriaMetrics/metrics v1.40.1/go.mod h1:XE4uudAAIRaJE614Tl5HMrtoEU6+GDZO4QTnNSsZRuA= github.com/VictoriaMetrics/metricsql v0.84.7 h1:zMONjtEULMbwEYU/qL4Hkc3GDfTTrv1bO+a9lmJf3do= github.com/VictoriaMetrics/metricsql v0.84.7/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ= +github.com/VictoriaMetrics/metricsql v0.84.8 h1:5JXrvPJiYkYNqJVT7+hMZmpAwRHd3txBdlVIw4rJ1VM= +github.com/VictoriaMetrics/metricsql v0.84.8/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/consts.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/consts.go index 03aa3bb24..6024aa2e1 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/consts.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/consts.go @@ -1,5 +1,10 @@ package logstorage +// maxParallelReaders is the maximum parallel readers to use when executing a query. +// +// bigger number of parallel readers may help increasing query performance on high-latency storage such as S3 and NFS. +const maxParallelReaders = 2_000 + // partFormatLatestVersion is the latest format version for parts. // // See partHeader.FormatVersion for details. diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/datadb.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/datadb.go index 933e1caec..f52ebdce9 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/datadb.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/datadb.go @@ -943,6 +943,32 @@ func (ddb *datadb) updateStats(s *DatadbStats) { ddb.partsLock.Unlock() } +// getMinMaxTimestampsFast returns min and max timestamps across parts in ddb. +func (ddb *datadb) getMinMaxTimestamps() (int64, int64) { + minTs := int64(math.MaxInt64) + maxTs := int64(math.MinInt64) + + updateMinMaxTimestamps := func(pws []*partWrapper) { + for _, pw := range pws { + ph := &pw.p.ph + if ph.MinTimestamp < minTs { + minTs = ph.MinTimestamp + } + if ph.MaxTimestamp > maxTs { + maxTs = ph.MaxTimestamp + } + } + } + + ddb.partsLock.Lock() + updateMinMaxTimestamps(ddb.inmemoryParts) + updateMinMaxTimestamps(ddb.smallParts) + updateMinMaxTimestamps(ddb.bigParts) + ddb.partsLock.Unlock() + + return minTs, maxTs +} + // debugFlush() makes sure that the recently ingested data is available for search. func (ddb *datadb) debugFlush() { ddb.rb.flush() diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go new file mode 100644 index 000000000..3eaa6b35c --- /dev/null +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go @@ -0,0 +1,114 @@ +package logstorage + +import ( + "fmt" + "sort" + "strings" + "unicode" + "unicode/utf8" + + "github.com/VictoriaMetrics/VictoriaLogs/lib/prefixfilter" +) + +// filterContainsCommonCase matches words and phrases where every captial letter +// can be replaced with a small letter, plus all capital words. +// +// Example LogsQL: `contains_common_case("Error")` is equivalent to contains_any("Error", "error", "ERROR") +type filterContainsCommonCase struct { + phrases []string + + containsAny *filterContainsAny +} + +func newFilterContainsCommonCase(fieldName string, phrases []string) (*filterContainsCommonCase, error) { + commonCasePhrases, err := getCommonCasePhrases(phrases) + if err != nil { + return nil, err + } + + fi := &filterContainsCommonCase{ + phrases: phrases, + containsAny: &filterContainsAny{ + fieldName: fieldName, + }, + } + fi.containsAny.values.values = commonCasePhrases + + return fi, nil +} + +func (fi *filterContainsCommonCase) String() string { + a := make([]string, len(fi.phrases)) + for i, phrase := range fi.phrases { + a[i] = quoteTokenIfNeeded(phrase) + } + phrases := strings.Join(a, ",") + return fmt.Sprintf("%scontains_common_case(%s)", quoteFieldNameIfNeeded(fi.containsAny.fieldName), phrases) +} + +func (fi *filterContainsCommonCase) updateNeededFields(pf *prefixfilter.Filter) { + fi.containsAny.updateNeededFields(pf) +} + +func (fi *filterContainsCommonCase) applyToBlockResult(br *blockResult, bm *bitmap) { + fi.containsAny.applyToBlockResult(br, bm) +} + +func (fi *filterContainsCommonCase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { + fi.containsAny.applyToBlockSearch(bs, bm) +} + +func getCommonCasePhrases(phrases []string) ([]string, error) { + var dst []string + for _, phrase := range phrases { + upper := countUpperRunes(phrase) + if upper > 10 { + return nil, fmt.Errorf("too many common_case combinations for the %q; reduce the number of uppercase letters here", phrase) + } + dst = appendCommonCasePhrases(dst, "", phrase) + dst = append(dst, strings.ToUpper(phrase)) + } + + // Deduplicate dst + m := make(map[string]struct{}, len(dst)) + for _, s := range dst { + m[s] = struct{}{} + } + + dst = dst[:0] + for s := range m { + dst = append(dst, s) + } + sort.Strings(dst) + + return dst, nil +} + +func countUpperRunes(s string) int { + upper := 0 + for _, c := range s { + if unicode.IsUpper(c) { + upper++ + } + } + return upper +} + +func appendCommonCasePhrases(dst []string, prefix, phrase string) []string { + dst = append(dst, prefix+phrase) + + for off, c := range phrase { + if !unicode.IsUpper(c) { + continue + } + charLen := utf8.RuneLen(c) + if charLen == -1 { + continue + } + + phraseTail := phrase[off+charLen:] + dst = appendCommonCasePhrases(dst, prefix+phrase[:off]+string(unicode.ToLower(c)), phraseTail) + dst = appendCommonCasePhrases(dst, prefix+phrase[:off+charLen], phraseTail) + } + return dst +} diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_equals_common_case.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_equals_common_case.go new file mode 100644 index 000000000..3f676dcb5 --- /dev/null +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_equals_common_case.go @@ -0,0 +1,56 @@ +package logstorage + +import ( + "fmt" + "strings" + + "github.com/VictoriaMetrics/VictoriaLogs/lib/prefixfilter" +) + +// filterEqualsCommonCase matches words and phrases where every captial letter +// can be replaced with a small letter, plus all capital words. +// +// Example LogsQL: `equals_common_case("Error")` is equivalent to in("Error", "error", "ERROR") +type filterEqualsCommonCase struct { + phrases []string + + equalsAny *filterIn +} + +func newFilterEqualsCommonCase(fieldName string, phrases []string) (*filterEqualsCommonCase, error) { + commonCasePhrases, err := getCommonCasePhrases(phrases) + if err != nil { + return nil, err + } + + fi := &filterEqualsCommonCase{ + phrases: phrases, + equalsAny: &filterIn{ + fieldName: fieldName, + }, + } + fi.equalsAny.values.values = commonCasePhrases + + return fi, nil +} + +func (fi *filterEqualsCommonCase) String() string { + a := make([]string, len(fi.phrases)) + for i, phrase := range fi.phrases { + a[i] = quoteTokenIfNeeded(phrase) + } + phrases := strings.Join(a, ",") + return fmt.Sprintf("%sequals_common_case(%s)", quoteFieldNameIfNeeded(fi.equalsAny.fieldName), phrases) +} + +func (fi *filterEqualsCommonCase) updateNeededFields(pf *prefixfilter.Filter) { + fi.equalsAny.updateNeededFields(pf) +} + +func (fi *filterEqualsCommonCase) applyToBlockResult(br *blockResult, bm *bitmap) { + fi.equalsAny.applyToBlockResult(br, bm) +} + +func (fi *filterEqualsCommonCase) applyToBlockSearch(bs *blockSearch, bm *bitmap) { + fi.equalsAny.applyToBlockSearch(bs, bm) +} diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/if_filter.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/if_filter.go index 7a8024d43..d2158cee5 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/if_filter.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/if_filter.go @@ -33,7 +33,7 @@ func parseIfFilter(lex *lexer) (*ifFilter, error) { return iff, nil } - f, err := parseFilter(lex) + f, err := parseFilter(lex, true) if err != nil { return nil, fmt.Errorf("cannot parse 'if' filter: %w", err) } diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/indexdb.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/indexdb.go index 01527d41a..d70211438 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/indexdb.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/indexdb.go @@ -470,6 +470,47 @@ func (is *indexSearch) getStreamIDsForTagRegexp(tenantID TenantID, tagName strin return ids } +func (is *indexSearch) getTenantIDs() []TenantID { + var tenantIDs []TenantID // return as result + var tenantID TenantID // variable for unmarshal + + ts := &is.ts + kb := &is.kb + + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamID, tenantID) + ts.Seek(kb.B) + + for ts.NextItem() { + _, prefix, err := unmarshalCommonPrefix(&tenantID, ts.Item) + if err != nil { + logger.Panicf("FATAL: cannot unmarshal tenantID: %s", err) + } + if prefix != nsPrefixStreamID { + // Reached the end of entries with the needed prefix. + break + } + tenantIDs = append(tenantIDs, tenantID) + // Seek for the next (accountID, projectID) + tenantID.ProjectID++ + if tenantID.ProjectID == 0 { + tenantID.AccountID++ + if tenantID.AccountID == 0 { + // Reached the end (accountID, projectID) space + break + } + } + + kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixStreamID, tenantID) + ts.Seek(kb.B) + } + + if err := ts.Error(); err != nil { + logger.Panicf("FATAL: error when searching for tenant ids: %s", err) + } + + return tenantIDs +} + func (idb *indexdb) mustRegisterStream(streamID *streamID, streamTagsCanonical string) { st := GetStreamTags() mustUnmarshalStreamTags(st, streamTagsCanonical) @@ -575,6 +616,13 @@ func (idb *indexdb) storeStreamIDsToCache(tenantIDs []TenantID, sf *StreamFilter bbPool.Put(bb) } +func (idb *indexdb) searchTenants() []TenantID { + is := idb.getIndexSearch() + defer idb.putIndexSearch(is) + + return is.getTenantIDs() +} + type batchItems struct { buf []byte diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/parser.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/parser.go index 261dcf705..21021c460 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/parser.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/parser.go @@ -122,6 +122,10 @@ func (lex *lexer) nextCompoundTokenExt(stopTokens []string) (string, error) { return s, nil } + if !lex.isSkippedSpace && lex.isKeywordAny(deniedFirstCompoundTokens) && isWord(lex.prevRawToken) { + return "", fmt.Errorf("missing whitespace between %q and %q", lex.prevRawToken, lex.token) + } + if !lex.isAllowedCompoundToken(stopTokens) { return "", fmt.Errorf("compound token cannot start with %q; put it into quotes if needed", lex.token) } @@ -165,7 +169,14 @@ func (lex *lexer) isAllowedCompoundToken(stopTokens []string) bool { } // Regular word token is allowed to be a part of compound token. - for _, r := range lex.token { + return isWord(lex.token) +} + +func isWord(s string) bool { + if s == "" { + return false + } + for _, r := range s { if !isTokenRune(r) { return false } @@ -173,6 +184,13 @@ func (lex *lexer) isAllowedCompoundToken(stopTokens []string) bool { return true } +// deniedFirstCompoundTokens contains disallowed starting tokens for compound tokens without the whitespace in front of these tokens. +var deniedFirstCompoundTokens = []string{ + "/", + ".", + "$", +} + // glueCompoundTokens contains tokens allowed inside unquoted compound tokens. var glueCompoundTokens = []string{ "+", // Seen in time formats: 2025-07-20T10:20:30+03:00 @@ -190,7 +208,7 @@ var mathStopCompoundTokens = []string{ "/", } -func (lex *lexer) isPrevRawToken(tokens ...string) bool { +func (lex *lexer) isPrevRawToken(tokens []string) bool { prevTokenLower := strings.ToLower(lex.prevRawToken) for _, token := range tokens { if token == prevTokenLower { @@ -205,7 +223,7 @@ func (lex *lexer) checkPrevAdjacentToken(tokens ...string) error { return nil } - if !lex.isPrevRawToken(tokens...) { + if !lex.isPrevRawToken(tokens) { return fmt.Errorf("missing whitespace or ':' between %q and %q; probably, the whole string must be put into quotes", lex.prevRawToken, lex.token) } @@ -360,14 +378,22 @@ type queryOptions struct { // needPrint is set to true if the queryOptions must be printed in the queryOptions.String(). needPrint bool - // concurrency is the number of concurrent workers to use for query execution on every. + // concurrency is the number of concurrent CPU-bound workers to use for a single query execution. // // By default the number of concurrent workers equals to the number of available CPU cores. concurrency uint + // parallelReaders is the number of concurrent IO-bound data readers to use for a single query execution. + // + // By default the number of parallel readers equals to concurrency. + parallelReaders uint + // if ignoreGlobalTimeFilter is set, then Query.AddTimeFilter doesn't add the time filter to the query and to all its subqueries. ignoreGlobalTimeFilter *bool + // allowPartialResponse allows returning partial responses in VictoriaLogs cluster setup when some of vlstorage nodes are temporarily unavailable. + allowPartialResponse *bool + // timeOffset is the number of nanoseconds to subtracts from all time filters in the query. // // The timeOffset is also added to the selected _time field values before being passed to query pipes. @@ -388,6 +414,9 @@ func (opts *queryOptions) String() string { if opts.ignoreGlobalTimeFilter != nil { a = append(a, fmt.Sprintf("ignore_global_time_filter=%v", *opts.ignoreGlobalTimeFilter)) } + if opts.allowPartialResponse != nil { + a = append(a, fmt.Sprintf("allow_partial_response=%v", *opts.allowPartialResponse)) + } if opts.timeOffsetStr != "" { a = append(a, fmt.Sprintf("time_offset=%s", opts.timeOffsetStr)) } @@ -413,6 +442,24 @@ func (q *Query) String() string { return s } +// GetParallelReaders returns the number of parallel readers to use for executing the given query. +func (q *Query) GetParallelReaders(defaultParallelReaders int) int { + n := int(q.opts.parallelReaders) + if n <= 0 { + n = int(q.opts.concurrency) + } + if n <= 0 { + n = defaultParallelReaders + } + if n <= 0 { + n = 2 * cgroup.AvailableCPUs() + } + if n > maxParallelReaders { + n = maxParallelReaders + } + return n +} + // GetConcurrency returns concurrency for the q. // // See https://docs.victoriametrics.com/victorialogs/logsql/#query-options @@ -599,6 +646,12 @@ func (q *Query) CloneWithTimeFilter(timestamp, start, end int64) *Query { // // The returned query is nil if q cannot be used for optimized querying of the last N results. func (q *Query) GetLastNResultsQuery() (qOpt *Query, offset uint64, limit uint64) { + start, end := q.GetFilterTimeRange() + if !CanApplyLastNResultsOptimization(start, end) { + // It is faster to execute the query as is on such a small time range. + return nil, 0, 0 + } + pipes := q.pipes // Remember the trailing 'fields' and 'delete' pipes - they are moved in front of `sort` pipe below. @@ -644,6 +697,11 @@ func (q *Query) GetLastNResultsQuery() (qOpt *Query, offset uint64, limit uint64 return qCopy, offset, limit } +// CanApplyLastNResultsOptimization returns true if there is sense for applying 'last N' optimization for the query on the time range [start, end] +func CanApplyLastNResultsOptimization(start, end int64) bool { + return end/2-start/2 > nsecsPerSecond +} + func getOffsetLimitFromPipe(p pipe) (uint64, uint64, bool) { switch t := p.(type) { case *pipeSort: @@ -734,14 +792,22 @@ func (q *Query) addTimeFilterNoSubqueries(start, end int64) { timeOffset := q.opts.timeOffset + // use nanosecond precision for [start, end] time range in order to avoid + // automatic adjustement of timestamps for its' string representation. + // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/587 + // + // Do not use numeric representation of timestamps, since they are improperly parsed + // for negative timestamps (they are parsed as relative to the current time) + // and for timestamps with less than 15 decimal digits (they are parsed as microsends, + // milliseconds or seconds depending on the number of decimal digit). + startStr := marshalTimestampRFC3339NanoPreciseString(nil, start) + endStr := marshalTimestampRFC3339NanoPreciseString(nil, end) + ft := &filterTime{ minTimestamp: subNoOverflowInt64(start, timeOffset), maxTimestamp: subNoOverflowInt64(end, timeOffset), - // use nanosecond representation in the query here in order to avoid - // automatic adjustement of the end timestamp for its' string representation. - // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/587 - stringRepr: fmt.Sprintf("[%d,%d]", start, end), + stringRepr: fmt.Sprintf("[%s,%s]", startStr, endStr), } fa, ok := q.f.(*filterAnd) @@ -976,15 +1042,19 @@ func (q *Query) GetStatsByFieldsAddGroupingByTime(step int64) ([]string, error) } ps := pipes[idx].(*pipeStats) - // verify that pipes in front of the last `pipe` do not modify or delete `_time` field - for i := 0; i < idx; i++ { - p := pipes[i] - if _, ok := p.(*pipeStats); ok { - // Skip `stats` pipe, since it is updated with the grouping by `_time` in the addByTimeFieldToStatsPipes() below. - continue - } - if !p.canReturnLastNResults() { - return nil, fmt.Errorf("the pipe `| %q` cannot be put in front of `| %q`, since it modifies or deletes `_time` field", p, ps) + // For range stats (step > 0), verify that pipes in front of the last `stats` pipe + // do not modify or delete the `_time` field, since it is required for bucketing by step. + // For instant stats (step == 0), allow such pipes for broader query flexibility. + if step > 0 { + for i := 0; i < idx; i++ { + p := pipes[i] + if _, ok := p.(*pipeStats); ok { + // Skip `stats` pipe, since it is updated with the grouping by `_time` in the addByTimeFieldToStatsPipes() below. + continue + } + if !p.canReturnLastNResults() { + return nil, fmt.Errorf("the pipe `| %q` cannot be put in front of `| %q`, since it modifies or deletes `_time` field", p, ps) + } } } @@ -1673,7 +1743,7 @@ func parseQuery(lex *lexer) (*Query, error) { lex.pushQueryOptions(&q.opts) defer lex.popQueryOptions() - f, err := parseFilter(lex) + f, err := parseFilter(lex, true) if err != nil { return nil, fmt.Errorf("%w; context: [%s]", err, lex.context()) } @@ -1759,12 +1829,15 @@ func parseQueryOptions(dstOpts *queryOptions, lex *lexer) error { if !ok { return fmt.Errorf("cannot parse 'concurrency=%q' option as unsigned integer", v) } - if n > 1024 { - // There is zero sense in running too many workers. - n = 1024 - } dstOpts.concurrency = uint(n) dstOpts.needPrint = true + case "parallel_readers": + n, ok := tryParseUint64(v) + if !ok { + return fmt.Errorf("cannot parse 'parallel_readers=%q' option as unsigned integer", v) + } + dstOpts.parallelReaders = uint(n) + dstOpts.needPrint = true case "ignore_global_time_filter": ignoreGlobalTimeFilter, err := strconv.ParseBool(v) if err != nil { @@ -1772,6 +1845,13 @@ func parseQueryOptions(dstOpts *queryOptions, lex *lexer) error { } dstOpts.ignoreGlobalTimeFilter = &ignoreGlobalTimeFilter dstOpts.needPrint = true + case "allow_partial_response": + allowPartialResponse, err := strconv.ParseBool(v) + if err != nil { + return fmt.Errorf("cannot parse 'allow_partial_response=%q' option as boolean: %w", v, err) + } + dstOpts.allowPartialResponse = &allowPartialResponse + dstOpts.needPrint = true case "time_offset": timeOffset, ok := tryParseDuration(v) if !ok { @@ -1814,16 +1894,18 @@ func parseKeyValuePair(lex *lexer) (string, string, error) { return k, v, nil } -func parseFilter(lex *lexer) (filter, error) { +func parseFilter(lex *lexer, allowPipeKeywords bool) (filter, error) { if lex.isKeyword("|", ")", "") { return nil, fmt.Errorf("missing query") } - // Verify the first token in the filter doesn't match pipe names. - firstToken := strings.ToLower(lex.rawToken) - if firstToken == "by" || isPipeName(firstToken) || isStatsFuncName(firstToken) { - return nil, fmt.Errorf("query filter cannot start with pipe keyword %q; see https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax; "+ - "please put the first word of the filter into quotes", firstToken) + if !allowPipeKeywords { + // Verify the first token in the filter doesn't match pipe names. + firstToken := strings.ToLower(lex.rawToken) + if firstToken == "by" || isPipeName(firstToken) || isStatsFuncName(firstToken) { + return nil, fmt.Errorf("query filter cannot start with pipe keyword %q; see https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax; "+ + "please put the first word of the filter into quotes", firstToken) + } } fo, err := parseFilterOr(lex, "") @@ -1917,8 +1999,12 @@ func parseFilterGeneric(lex *lexer, fieldName string) (filter, error) { return parseFilterContainsAll(lex, fieldName) case lex.isKeyword("contains_any"): return parseFilterContainsAny(lex, fieldName) + case lex.isKeyword("contains_common_case"): + return parseFilterContainsCommonCase(lex, fieldName) case lex.isKeyword("eq_field"): return parseFilterEqField(lex, fieldName) + case lex.isKeyword("equals_common_case"): + return parseFilterEqualsCommonCase(lex, fieldName) case lex.isKeyword("exact"): return parseFilterExact(lex, fieldName) case lex.isKeyword("i"): @@ -2224,6 +2310,36 @@ func parseFilterIn(lex *lexer, fieldName string) (filter, error) { return parseInValues(lex, fieldName, fi, &fi.values) } +func parseFilterContainsCommonCase(lex *lexer, fieldName string) (filter, error) { + lex.nextToken() + + phrases, err := parseArgsInParens(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse 'contains_common_case(...)' args: %w", err) + } + + fi, err := newFilterContainsCommonCase(fieldName, phrases) + if err != nil { + return nil, fmt.Errorf("cannot parse 'contains_common_case(...)': %w", err) + } + return fi, nil +} + +func parseFilterEqualsCommonCase(lex *lexer, fieldName string) (filter, error) { + lex.nextToken() + + phrases, err := parseArgsInParens(lex) + if err != nil { + return nil, fmt.Errorf("cannot parse 'equals_common_case(...)' args: %w", err) + } + + fi, err := newFilterEqualsCommonCase(fieldName, phrases) + if err != nil { + return nil, fmt.Errorf("cannot parse 'equals_common_case(...)': %w", err) + } + return fi, nil +} + func parseInValues(lex *lexer, fieldName string, f filter, iv *inValues) (filter, error) { // Try parsing in(arg1, ..., argN) at first lexState := lex.backupState() @@ -2388,12 +2504,18 @@ func parseFilterStar(lex *lexer, fieldName string) (filter, error) { } func parseFilterTilda(lex *lexer, fieldName string) (filter, error) { + op := lex.token lex.nextToken() if lex.isKeyword("-") { return nil, fmt.Errorf("regexp, which start with %q, must be put in quotes", lex.token) } + if lex.isSkippedSpace && fieldName == "" { + // Deny invalid filters `foo ~ bar` and `foo !~ bar`. They must be written as `foo:~bar` and `foo:!~bar` + return nil, fmt.Errorf("missing ':' in front of %q; see https://docs.victoriametrics.com/victorialogs/logsql/#filters", op) + } + arg, err := lex.nextCompoundToken() if err != nil { return nil, fmt.Errorf("cannot read regexp for field %q: %w", getCanonicalColumnName(fieldName), err) @@ -2416,6 +2538,10 @@ func parseFilterNotTilda(lex *lexer, fieldName string) (filter, error) { func parseFilterEQ(lex *lexer, fieldName string) (filter, error) { op := lex.token lex.nextToken() + if lex.isSkippedSpace && fieldName == "" { + // Deny invalid filters 'foo = bar`. It must be written as `foo:=bar` + return nil, fmt.Errorf("missing ':' in front of %q; see https://docs.victoriametrics.com/victorialogs/logsql/#filters", op) + } phrase, err := lex.nextCompoundToken() if err != nil { @@ -2453,12 +2579,17 @@ func parseFilterGT(lex *lexer, fieldName string) (filter, error) { includeMinValue := false op := ">" - if lex.isKeyword("=") { + if !lex.isSkippedSpace && lex.isKeyword("=") { lex.nextToken() includeMinValue = true op = ">=" } + if lex.isSkippedSpace && fieldName == "" { + // Deny invalid filters 'foo > bar' and 'foo >= bar'. They must be written as 'foo:>bar` and `foo:>=bar` + return nil, fmt.Errorf("missing ':' in front of %q; see https://docs.victoriametrics.com/victorialogs/logsql/#filters", op) + } + lexState := lex.backupState() minValue, fStr, err := parseNumber(lex) if err != nil { @@ -2488,12 +2619,17 @@ func parseFilterLT(lex *lexer, fieldName string) (filter, error) { includeMaxValue := false op := "<" - if lex.isKeyword("=") { + if !lex.isSkippedSpace && lex.isKeyword("=") { lex.nextToken() includeMaxValue = true op = "<=" } + if lex.isSkippedSpace && fieldName == "" { + // Deny invalid filters 'foo < bar' and 'foo <= bar'. They must be written as 'foo: 0 { return 0, fmt.Errorf("unexpected tail left after importing shards' state; len(tail)=%d", len(src)) } - sup.shards = shards + + stateSizeIncrease = sup.importShards(shards, stateSizeIncrease) return stateSizeIncrease, nil } +func (sup *statsCountUniqProcessor) importShards(shards []statsCountUniqSet, stateSizeIncrease int) int { + if uint(len(shards)) == sup.concurrency { + // Fast path - nothing to reshard + sup.shards = shards + return stateSizeIncrease + } + + // Slow path - reshard shards in order to align len(shards) with sup.concurrency. + // This case is possible when the remote side has different concurrency than the sup.concurrency. + // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/682 + stateSizeIncrease = 0 + for i := range shards { + stateSizeIncrease += sup.importShard(&shards[i]) + } + return stateSizeIncrease +} + +func (sup *statsCountUniqProcessor) importShard(shard *statsCountUniqSet) int { + stateSizeIncrease := 0 + for ts := range shard.timestamps { + stateSizeIncrease += sup.updateStateTimestamp(int64(ts)) + } + for n := range shard.u64 { + stateSizeIncrease += sup.updateStateUint64(n) + } + for n := range shard.negative64 { + stateSizeIncrease += sup.updateStateNegativeInt64(int64(n)) + } + for s := range shard.strings { + stateSizeIncrease += sup.updateStateString(bytesutil.ToUnsafeBytes(s)) + } + return stateSizeIncrease +} + func (sup *statsCountUniqProcessor) finalizeStats(sf statsFunc, dst []byte, stopCh <-chan struct{}) []byte { sup.mergeShardssParallel(stopCh) diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stats_count_uniq_hash.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stats_count_uniq_hash.go index 6c739de69..a985e1a56 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stats_count_uniq_hash.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stats_count_uniq_hash.go @@ -466,10 +466,6 @@ func (sup *statsCountUniqHashProcessor) importState(src []byte, stopCh <-chan st return stateSize, nil } - if shardsLen != uint64(sup.concurrency) { - return 0, fmt.Errorf("unexpected number of imported shards: %d; want %d", shardsLen, sup.concurrency) - } - shards := make([]statsCountUniqHashSet, shardsLen) stateSizeIncrease := int(unsafe.Sizeof(shards[0])) * len(shards) for i := range shards { @@ -484,11 +480,46 @@ func (sup *statsCountUniqHashProcessor) importState(src []byte, stopCh <-chan st if len(src) > 0 { return 0, fmt.Errorf("unexpected tail left after importing shards' state; len(tail)=%d", len(src)) } - sup.shards = shards + + stateSizeIncrease = sup.importShards(shards, stateSizeIncrease) return stateSizeIncrease, nil } +func (sup *statsCountUniqHashProcessor) importShards(shards []statsCountUniqHashSet, stateSizeIncrease int) int { + if uint(len(shards)) == sup.concurrency { + // Fast path - nothing to reshard + sup.shards = shards + return stateSizeIncrease + } + + // Slow path - reshard shards in order to align len(shards) with sup.concurrency. + // This case is possible when the remote side has different concurrency than the sup.concurrency. + // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/682 + stateSizeIncrease = 0 + for i := range shards { + stateSizeIncrease += sup.importShard(&shards[i]) + } + return stateSizeIncrease +} + +func (sup *statsCountUniqHashProcessor) importShard(shard *statsCountUniqHashSet) int { + stateSizeIncrease := 0 + for ts := range shard.timestamps { + stateSizeIncrease += sup.updateStateTimestamp(int64(ts)) + } + for n := range shard.u64 { + stateSizeIncrease += sup.updateStateUint64(n) + } + for n := range shard.negative64 { + stateSizeIncrease += sup.updateStateNegativeInt64(int64(n)) + } + for h := range shard.strings { + stateSizeIncrease += sup.updateStateStringHash(h) + } + return stateSizeIncrease +} + func (sup *statsCountUniqHashProcessor) finalizeStats(sf statsFunc, dst []byte, stopCh <-chan struct{}) []byte { sup.mergeShardssParallel(stopCh) @@ -567,6 +598,10 @@ func (sup *statsCountUniqHashProcessor) updateStateInt64(n int64) int { func (sup *statsCountUniqHashProcessor) updateStateString(v []byte) int { h := xxhash.Sum64(v) + return sup.updateStateStringHash(h) +} + +func (sup *statsCountUniqHashProcessor) updateStateStringHash(h uint64) int { if sup.shards == nil { stateSizeIncrease := sup.uniqValues.updateStateStringHash(h) if stateSizeIncrease > 0 { @@ -574,10 +609,6 @@ func (sup *statsCountUniqHashProcessor) updateStateString(v []byte) int { } return stateSizeIncrease } - return sup.updateStateStringHash(h) -} - -func (sup *statsCountUniqHashProcessor) updateStateStringHash(h uint64) int { sus := sup.getShardByStringHash(h) return sus.updateStateStringHash(h) } diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage.go index bfd956192..f11f44684 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage.go @@ -2,6 +2,7 @@ package logstorage import ( "fmt" + "math" "os" "path/filepath" "slices" @@ -37,6 +38,14 @@ type StorageStats struct { // PartitionStats contains partition stats. PartitionStats + + // MinTimestamp is the minimum event timestamp across the entire storage (in nanoseconds). + // It is set to math.MinInt64 if there is no data. + MinTimestamp int64 + + // MaxTimestamp is the maximum event timestamp across the entire storage (in nanoseconds). + // It is set to math.MaxInt64 if there is no data. + MaxTimestamp int64 } // Reset resets s. @@ -51,6 +60,11 @@ type StorageConfig struct { // Older data is automatically deleted. Retention time.Duration + // DefaultParallelReaders is the default number of parallel readers to use per each query execution. + // + // Higher value can help improving query performance on storage with high disk read latency such as S3. + DefaultParallelReaders int + // MaxDiskSpaceUsageBytes is an optional maximum disk space logs can use. // // The oldest per-day partitions are automatically dropped if the total disk space usage exceeds this limit. @@ -97,6 +111,11 @@ type Storage struct { // older data is automatically deleted retention time.Duration + // defaultParallelReaders is the default number of parallel IO-bound readers to use for query execution. + // + // Higher number of readers may help increasing query performance on storage with high read latency such as S3. + defaultParallelReaders int + // maxDiskSpaceUsageBytes is an optional maximum disk space logs can use. // // The oldest per-day partitions are automatically dropped if the total disk space usage exceeds this limit. @@ -444,6 +463,7 @@ func MustOpenStorage(path string, cfg *StorageConfig) *Storage { s := &Storage{ path: path, retention: retention, + defaultParallelReaders: cfg.DefaultParallelReaders, maxDiskSpaceUsageBytes: cfg.MaxDiskSpaceUsageBytes, maxDiskUsagePercent: cfg.MaxDiskUsagePercent, flushInterval: flushInterval, @@ -918,12 +938,22 @@ func (s *Storage) UpdateStats(ss *StorageStats) { } else { ss.MaxDiskSpaceUsageBytes = int64(fs.MustGetTotalSpace(s.path) * uint64(s.maxDiskUsagePercent) / 100) } + // Use sentinel values to indicate unbounded / no data for consistency + ss.MinTimestamp, ss.MaxTimestamp = math.MinInt64, math.MaxInt64 s.partitionsLock.Lock() ss.PartitionsCount += uint64(len(s.partitions)) for _, ptw := range s.partitions { ptw.pt.updateStats(&ss.PartitionStats) } + + if len(s.partitions) > 0 { + p0 := s.partitions[0] + pLast := s.partitions[len(s.partitions)-1] + + ss.MinTimestamp, _ = p0.pt.ddb.getMinMaxTimestamps() + _, ss.MaxTimestamp = pLast.pt.ddb.getMinMaxTimestamps() + } s.partitionsLock.Unlock() ss.IsReadOnly = s.IsReadOnly() diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go index 75f4c3954..24027d679 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go @@ -34,6 +34,9 @@ type QueryContext struct { // Query is the query to execute. Query *Query + // AllowPartialResponse indicates whether to allow partial response. This flag is used only in cluster setup when vlselect queries vlstorage nodes. + AllowPartialResponse bool + // startTime is creation time for the QueryContext. // // It is used for calculating query druation. @@ -41,24 +44,24 @@ type QueryContext struct { } // NewQueryContext returns new context for the given query. -func NewQueryContext(ctx context.Context, qs *QueryStats, tenantIDs []TenantID, q *Query) *QueryContext { +func NewQueryContext(ctx context.Context, qs *QueryStats, tenantIDs []TenantID, q *Query, allowPartialResponse bool) *QueryContext { startTime := time.Now() - return newQueryContext(ctx, qs, tenantIDs, q, startTime) + return newQueryContext(ctx, qs, tenantIDs, q, allowPartialResponse, startTime) } // WithQuery returns new QueryContext with the given q, while preserving other fields from qctx. func (qctx *QueryContext) WithQuery(q *Query) *QueryContext { - return newQueryContext(qctx.Context, qctx.QueryStats, qctx.TenantIDs, q, qctx.startTime) + return newQueryContext(qctx.Context, qctx.QueryStats, qctx.TenantIDs, q, qctx.AllowPartialResponse, qctx.startTime) } // WithContext returns new QueryContext with the given ctx, while preserving other fields from qctx. func (qctx *QueryContext) WithContext(ctx context.Context) *QueryContext { - return newQueryContext(ctx, qctx.QueryStats, qctx.TenantIDs, qctx.Query, qctx.startTime) + return newQueryContext(ctx, qctx.QueryStats, qctx.TenantIDs, qctx.Query, qctx.AllowPartialResponse, qctx.startTime) } // WithContextAndQuery returns new QueryContext with the given ctx and q, while preserving other fields from qctx. func (qctx *QueryContext) WithContextAndQuery(ctx context.Context, q *Query) *QueryContext { - return newQueryContext(ctx, qctx.QueryStats, qctx.TenantIDs, q, qctx.startTime) + return newQueryContext(ctx, qctx.QueryStats, qctx.TenantIDs, q, qctx.AllowPartialResponse, qctx.startTime) } // QueryDurationNsecs returns the duration in nanoseconds since the NewQueryContext call. @@ -66,13 +69,21 @@ func (qctx *QueryContext) QueryDurationNsecs() int64 { return time.Since(qctx.startTime).Nanoseconds() } -func newQueryContext(ctx context.Context, qs *QueryStats, tenantIDs []TenantID, q *Query, startTime time.Time) *QueryContext { +func newQueryContext(ctx context.Context, qs *QueryStats, tenantIDs []TenantID, q *Query, allowPartialResponse bool, startTime time.Time) *QueryContext { + if q.opts.allowPartialResponse != nil { + // query options override other settings for allowPartialResponse. + allowPartialResponse = *q.opts.allowPartialResponse + } + return &QueryContext{ Context: ctx, QueryStats: qs, TenantIDs: tenantIDs, Query: q, - startTime: startTime, + + AllowPartialResponse: allowPartialResponse, + + startTime: startTime, } } @@ -192,21 +203,22 @@ func (s *Storage) runQuery(qctx *QueryContext, writeBlock writeBlockResultFunc) timeOffset: -q.opts.timeOffset, } - workersCount := q.GetConcurrency() - search := func(stopCh <-chan struct{}, writeBlockToPipes writeBlockResultFunc) error { - s.search(workersCount, so, qctx.QueryStats, stopCh, writeBlockToPipes) + workersCount := q.GetParallelReaders(s.defaultParallelReaders) + s.searchParallel(workersCount, so, qctx.QueryStats, stopCh, writeBlockToPipes) return nil } - return runPipes(qctx, q.pipes, search, writeBlock, workersCount) + concurrency := q.GetConcurrency() + return runPipes(qctx, q.pipes, search, writeBlock, concurrency) } // searchFunc must perform search and pass its results to writeBlock. type searchFunc func(stopCh <-chan struct{}, writeBlock writeBlockResultFunc) error func runPipes(qctx *QueryContext, pipes []pipe, search searchFunc, writeBlock writeBlockResultFunc, concurrency int) error { - ctx := qctx.Context + ctx, topCancel := context.WithCancel(qctx.Context) + defer topCancel() stopCh := ctx.Done() if len(pipes) == 0 { @@ -214,7 +226,7 @@ func runPipes(qctx *QueryContext, pipes []pipe, search searchFunc, writeBlock wr return search(stopCh, writeBlock) } - pp := newNoopPipeProcessor(writeBlock) + pp := newNoopPipeProcessor(stopCh, writeBlock) cancels := make([]func(), len(pipes)) pps := make([]pipeProcessor, len(pipes)) @@ -231,6 +243,10 @@ func runPipes(qctx *QueryContext, pipes []pipe, search searchFunc, writeBlock wr } errSearch := search(stopCh, pp.writeBlock) + if errSearch != nil { + // Cancel the whole query in order to free up resources occupied by pipes. + topCancel() + } var errFlush error for i, pp := range pps { @@ -242,6 +258,9 @@ func runPipes(qctx *QueryContext, pipes []pipe, search searchFunc, writeBlock wr } if err := pp.flush(); err != nil && errFlush == nil { + // Cancel the whole query in order to free up resources occupied by the remaining pipes. + topCancel() + errFlush = err } cancel := cancels[i] @@ -568,6 +587,86 @@ func (s *Storage) GetStreamIDs(qctx *QueryContext, limit uint64) ([]ValueWithHit return s.GetFieldValues(qctx, "_stream_id", limit) } +// GetTenantIDs returns tenantIDs for the given start and end. +func (s *Storage) GetTenantIDs(ctx context.Context, start, end int64) ([]TenantID, error) { + return s.getTenantIDs(ctx, start, end) +} + +func (s *Storage) getTenantIDs(ctx context.Context, start, end int64) ([]TenantID, error) { + workersCount := cgroup.AvailableCPUs() + stopCh := ctx.Done() + + tenantIDByWorker := make([][]TenantID, workersCount) + + // spin up workers + var wg sync.WaitGroup + workCh := make(chan *partition, workersCount) + for i := 0; i < workersCount; i++ { + wg.Add(1) + go func(workerID uint) { + defer wg.Done() + for pt := range workCh { + if needStop(stopCh) { + // The search has been canceled. Just skip all the scheduled work in order to save CPU time. + continue + } + tenantIDs := pt.idb.searchTenants() + tenantIDByWorker[workerID] = append(tenantIDByWorker[workerID], tenantIDs...) + } + }(uint(i)) + } + + // Select partitions according to the selected time range + s.partitionsLock.Lock() + ptws := s.partitions + minDay := start / nsecsPerDay + n := sort.Search(len(ptws), func(i int) bool { + return ptws[i].day >= minDay + }) + ptws = ptws[n:] + maxDay := end / nsecsPerDay + n = sort.Search(len(ptws), func(i int) bool { + return ptws[i].day > maxDay + }) + ptws = ptws[:n] + + // Copy the selected partitions, so they don't interfere with s.partitions. + ptws = append([]*partitionWrapper{}, ptws...) + + for _, ptw := range ptws { + ptw.incRef() + } + s.partitionsLock.Unlock() + + // Schedule concurrent search across matching partitions. + for _, ptw := range ptws { + workCh <- ptw.pt + } + + // Wait until workers finish their work + close(workCh) + wg.Wait() + + // Decrement references to partitions + for _, ptw := range ptws { + ptw.decRef() + } + + uniqTenantIDs := make(map[TenantID]struct{}) + for _, tenantIDs := range tenantIDByWorker { + for _, tenantID := range tenantIDs { + uniqTenantIDs[tenantID] = struct{}{} + } + } + + tenants := make([]TenantID, 0, len(uniqTenantIDs)) + for k := range uniqTenantIDs { + tenants = append(tenants, k) + } + + return tenants, nil +} + func (s *Storage) runValuesWithHitsQuery(qctx *QueryContext) ([]ValueWithHits, error) { var results []ValueWithHits var resultsLock sync.Mutex @@ -631,10 +730,10 @@ func initSubqueries(qctx *QueryContext, runQuery runQueryFunc, keepInSubquery bo } qNew = initUnionQueries(qNew, runUnionQuery) - return initStreamContextPipes(qctx.QueryStats, qNew, runQuery) + return initStreamContextPipes(qctx, qNew, runQuery) } -func initStreamContextPipes(qs *QueryStats, q *Query, runQuery runQueryFunc) (*Query, error) { +func initStreamContextPipes(qctx *QueryContext, q *Query, runQuery runQueryFunc) (*Query, error) { pipes := q.pipes if len(pipes) == 0 { @@ -652,7 +751,7 @@ func initStreamContextPipes(qs *QueryStats, q *Query, runQuery runQueryFunc) (*Q fieldsFilter := getNeededColumns(pipes) pipesNew := append([]pipe{}, pipes...) - pipesNew[0] = pc.withRunQuery(qs, runQuery, fieldsFilter) + pipesNew[0] = pc.withRunQuery(qctx, runQuery, fieldsFilter) qNew := q.cloneShallow() qNew.pipes = pipesNew return qNew, nil @@ -1123,8 +1222,8 @@ func (db *DataBlock) initFromBlockResult(br *blockResult) { // search searches for the matching rows according to so. // -// It calls writeBlock for each matching block. -func (s *Storage) search(workersCount int, so *genericSearchOptions, qs *QueryStats, stopCh <-chan struct{}, writeBlock writeBlockResultFunc) { +// It uses workersCount parallel workers for the search and calls writeBlock for each matching block. +func (s *Storage) searchParallel(workersCount int, so *genericSearchOptions, qs *QueryStats, stopCh <-chan struct{}, writeBlock writeBlockResultFunc) { // Spin up workers var wgWorkers sync.WaitGroup workCh := make(chan *blockSearchWorkBatch, workersCount) diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stream_tags.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stream_tags.go index 22ceeecdf..d8910c027 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stream_tags.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/stream_tags.go @@ -142,6 +142,10 @@ func (st *StreamTags) UnmarshalCanonical(src []byte) ([]byte, error) { st.Add(sName, sValue) } + if !sort.IsSorted(st) { + return srcOrig, fmt.Errorf("stream tags must be sorted in alphabetical order; got unsorted: %s", st) + } + return src, nil } diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/syslog_parser.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/syslog_parser.go index 5a202ef26..52baf91f1 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/syslog_parser.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/syslog_parser.go @@ -84,6 +84,21 @@ func (p *SyslogParser) resetFields() { p.sdParser.reset() } +func (p *SyslogParser) AddMessageField(s string) { + if !strings.HasPrefix(s, "CEF:") { + p.AddField("message", s) + return + } + + s = strings.TrimPrefix(s, "CEF:") + fields := p.Fields + if p.parseCEFMessage(s) { + return + } + p.Fields = fields + p.AddField("message", s) +} + // AddField adds name=value log field to p.Fields. func (p *SyslogParser) AddField(name, value string) { p.Fields = append(p.Fields, Field{ @@ -298,7 +313,7 @@ func (p *SyslogParser) parseRFC5424(s string) { s = tail // Parse message - p.AddField("message", s) + p.AddMessageField(s) } func (p *SyslogParser) parseRFC5424SD(s string) (string, bool) { @@ -417,14 +432,14 @@ func (p *SyslogParser) parseRFC3164(s string) { // Parse timestamp: prefer classic RFC3164 n := len(time.Stamp) if len(s) < n { - p.AddField("message", s) + p.AddMessageField(s) return } if s[len("2006-01-02")] != 'T' { // Parse RFC3164 timestamp. if !p.tryParseTimestampRFC3164(s[:n]) { - p.AddField("message", s) + p.AddMessageField(s) return } } else { @@ -432,11 +447,11 @@ func (p *SyslogParser) parseRFC3164(s string) { // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/303 n = strings.IndexByte(s, ' ') if n < 0 { - p.AddField("message", s) + p.AddMessageField(s) return } if !p.tryParseTimestampRFC3339Nano(s[:n]) { - p.AddField("message", s) + p.AddMessageField(s) return } } @@ -445,7 +460,7 @@ func (p *SyslogParser) parseRFC3164(s string) { if len(s) == 0 || s[0] != ' ' { // Missing space after the time field if len(s) > 0 { - p.AddField("message", s) + p.AddMessageField(s) } return } @@ -481,7 +496,8 @@ func (p *SyslogParser) parseRFC3164(s string) { p.AddField("app_name", s) return } - p.AddField("app_name", s[:n]) + appName := s[:n] + p.AddField("app_name", appName) s = s[n:] // Parse proc_id @@ -503,8 +519,107 @@ func (p *SyslogParser) parseRFC3164(s string) { s = strings.TrimPrefix(s, " ") if len(s) > 0 { - p.AddField("message", s) + if appName == "CEF" { + fields := p.Fields + if p.parseCEFMessage(s) { + return + } + p.Fields = fields + } + p.AddMessageField(s) + } +} + +// parseCEFMessage parses CEF message. See https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors-8.3/cef-implementation-standard/Content/CEF/Chapter%201%20What%20is%20CEF.htm +func (p *SyslogParser) parseCEFMessage(s string) bool { + // Parse CEF version + n := nextUnescapedChar(s, '|') + if n < 0 { + return false + } + p.AddField("cef.version", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse device_vendor + n = nextUnescapedChar(s, '|') + if n < 0 { + return false + } + p.AddField("cef.device_vendor", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse device_product + n = nextUnescapedChar(s, '|') + if n < 0 { + return false + } + p.AddField("cef.device_product", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse device_version + n = nextUnescapedChar(s, '|') + if n < 0 { + return false + } + p.AddField("cef.device_version", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse device_event_class_id + n = nextUnescapedChar(s, '|') + if n < 0 { + return false + } + p.AddField("cef.device_event_class_id", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse name + n = nextUnescapedChar(s, '|') + if n < 0 { + return false + } + p.AddField("cef.name", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse severity + n = nextUnescapedChar(s, '|') + if n < 0 { + return false } + p.AddField("cef.severity", unescapeCEFValue(s[:n])) + s = s[n+1:] + + // Parse extension + return p.parseCEFExtension(s) +} + +func (p *SyslogParser) parseCEFExtension(s string) bool { + if s == "" { + return true + } + for { + // Parse key name + n := nextUnescapedChar(s, '=') + if n < 0 { + return false + } + keyName := "cef.extension." + unescapeCEFValue(s[:n]) + s = s[n+1:] + + // Parse key value + n = nextUnescapedChar(s, '=') + if n < 0 { + p.AddField(keyName, s) + return true + } + + n = strings.LastIndexByte(s[:n], ' ') + if n < 0 { + return false + } + p.AddField(keyName, unescapeCEFValue(s[:n])) + s = s[n+1:] + } + } func (p *SyslogParser) tryParseTimestampRFC3164(s string) bool { @@ -536,3 +651,59 @@ func (p *SyslogParser) tryParseTimestampRFC3339Nano(s string) bool { p.AddField("timestamp", bytesutil.ToUnsafeString(p.buf[bufLen:])) return true } + +func nextUnescapedChar(s string, c byte) int { + offset := 0 + for { + n := strings.IndexByte(s[offset:], c) + if n < 0 { + return -1 + } + offset += n + + if prevBackslashesCount(s, offset)%2 == 0 { + return offset + } + offset++ + } +} + +func unescapeCEFValue(s string) string { + n := strings.IndexByte(s, '\\') + if n < 0 { + return s + } + + b := make([]byte, 0, len(s)) + for { + b = append(b, s[:n]...) + n++ + if n >= len(s) { + b = append(b, '\\') + return string(b) + } + switch s[n] { + case 'n': + b = append(b, '\n') + case 'r': + b = append(b, '\r') + default: + b = append(b, s[n]) + } + s = s[n+1:] + + n = strings.IndexByte(s, '\\') + if n < 0 { + b = append(b, s...) + return string(b) + } + } +} + +func prevBackslashesCount(s string, offset int) int { + offsetOrig := offset + for offset > 0 && s[offset-1] == '\\' { + offset-- + } + return offsetOrig - offset +} diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/values_encoder.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/values_encoder.go index 62db9f85c..8e91f200e 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/values_encoder.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/values_encoder.go @@ -1427,3 +1427,8 @@ const iso8601Timestamp = "2006-01-02T15:04:05.000Z" func marshalTimestampRFC3339NanoString(dst []byte, nsecs int64) []byte { return time.Unix(0, nsecs).UTC().AppendFormat(dst, time.RFC3339Nano) } + +// marshalTimestampRFC3339NanoPreciseString appends RFC3339-formatted nsecs with nanosecond precision to dst and returns the result. +func marshalTimestampRFC3339NanoPreciseString(dst []byte, nsecs int64) []byte { + return time.Unix(0, nsecs).UTC().AppendFormat(dst, "2006-01-02T15:04:05.000000000Z07:00") +} diff --git a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/appmetrics/appmetrics.go b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/appmetrics/appmetrics.go index de446167d..fe9af54d1 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/appmetrics/appmetrics.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/appmetrics/appmetrics.go @@ -4,7 +4,6 @@ import ( "flag" "fmt" "io" - "regexp" "strings" "sync" "sync/atomic" @@ -28,8 +27,6 @@ func initExposeMetadata() { metrics.ExposeMetadata(*exposeMetadata) } -var versionRe = regexp.MustCompile(`v\d+\.\d+\.\d+(?:-enterprise)?(?:-cluster)?`) - // WritePrometheusMetrics writes all the registered metrics to w in Prometheus exposition format. func WritePrometheusMetrics(w io.Writer) { exposeMetadataOnce.Do(initExposeMetadata) @@ -58,7 +55,7 @@ func writePrometheusMetrics(w io.Writer) { metrics.WritePrometheus(w, true) metrics.WriteFDMetrics(w) - metrics.WriteGaugeUint64(w, fmt.Sprintf("vm_app_version{version=%q, short_version=%q}", buildinfo.Version, versionRe.FindString(buildinfo.Version)), 1) + metrics.WriteGaugeUint64(w, fmt.Sprintf("vm_app_version{version=%q, short_version=%q}", buildinfo.Version, buildinfo.ShortVersion()), 1) metrics.WriteGaugeUint64(w, "vm_allowed_memory_bytes", uint64(memory.Allowed())) metrics.WriteGaugeUint64(w, "vm_available_memory_bytes", uint64(memory.Allowed()+memory.Remaining())) metrics.WriteGaugeUint64(w, "vm_available_cpu_cores", uint64(cgroup.AvailableCPUs())) diff --git a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo/version.go b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo/version.go index 21b63d461..a54483bf9 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo/version.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo/version.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "os" + "regexp" ) var version = flag.Bool("version", false, "Show VictoriaMetrics version") @@ -11,6 +12,13 @@ var version = flag.Bool("version", false, "Show VictoriaMetrics version") // Version must be set via -ldflags '-X' var Version string +var shortVersionRe = regexp.MustCompile(`v\d+\.\d+\.\d+(?:-enterprise)?(?:-cluster)?`) + +// ShortVersion returns a shortened version +func ShortVersion() string { + return shortVersionRe.FindString(Version) +} + // Init must be called after flag.Parse call. func Init() { if *version { diff --git a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/dir_remover.go b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/dir_remover.go index 4e74bf9d2..ad96bb4a8 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/dir_remover.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/dir_remover.go @@ -76,9 +76,15 @@ func MustRemoveDir(dirPath string) { // so they are no longer visible after unclean shutdown. MustSyncPath(dirPath) - // Remove the deleteDirFilename file + // Remove the deleteDirFilename file, since there are no other entries left in the directory. MustRemovePath(deleteFilePath) + // Sync the directory after the removing deletDirFilename file in order to make sure + // all the metadata files are removed at some exotic filesystems such as OSSFS2. + // See https://github.com/VictoriaMetrics/VictoriaLogs/issues/649 + // and https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9709 + MustSyncPath(dirPath) + // Remove the dirPath itself MustRemovePath(dirPath) diff --git a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool/timerpool.go b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool/timerpool.go index 746f458e0..1b2c78e51 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool/timerpool.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool/timerpool.go @@ -25,13 +25,7 @@ func Get(d time.Duration) *time.Timer { // // t cannot be accessed after returning to the pool. func Put(t *time.Timer) { - if !t.Stop() { - // Drain t.C if it wasn't obtained by the caller yet. - select { - case <-t.C: - default: - } - } + t.Stop() timerPool.Put(t) } diff --git a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter/concurrencylimiter.go b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter/concurrencylimiter.go index 580540850..f0ddf335a 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter/concurrencylimiter.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter/concurrencylimiter.go @@ -115,12 +115,11 @@ func incConcurrency() bool { concurrencyLimitReached.Inc() t := timerpool.Get(*maxQueueDuration) + defer timerpool.Put(t) select { case concurrencyLimitCh <- struct{}{}: - timerpool.Put(t) return true case <-t.C: - timerpool.Put(t) concurrencyLimitTimeout.Inc() return false } diff --git a/vendor/github.com/VictoriaMetrics/metricsql/utils.go b/vendor/github.com/VictoriaMetrics/metricsql/utils.go index bbc6a95d5..a0fb91346 100644 --- a/vendor/github.com/VictoriaMetrics/metricsql/utils.go +++ b/vendor/github.com/VictoriaMetrics/metricsql/utils.go @@ -141,7 +141,8 @@ func checkSupportedFunctions(e Expr) error { if !ok { return } - if !IsRollupFunc(fe.Name) && !IsTransformFunc(fe.Name) { + + if !IsSupportedFunction(fe.Name) { err = fmt.Errorf("unsupported function %q", fe.Name) } }) diff --git a/vendor/modules.txt b/vendor/modules.txt index f50564bc4..7c00b4f0d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,8 +1,8 @@ -# github.com/VictoriaMetrics/VictoriaLogs v1.33.1 -## explicit; go 1.25.0 +# github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8 +## explicit; go 1.25.1 github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage github.com/VictoriaMetrics/VictoriaLogs/lib/prefixfilter -# github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250903201027-a0a33f0ce1c2 +# github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871 ## explicit; go 1.25.0 github.com/VictoriaMetrics/VictoriaMetrics/lib/appmetrics github.com/VictoriaMetrics/VictoriaMetrics/lib/atomicutil @@ -52,7 +52,7 @@ github.com/VictoriaMetrics/fastcache # github.com/VictoriaMetrics/metrics v1.40.1 ## explicit; go 1.18 github.com/VictoriaMetrics/metrics -# github.com/VictoriaMetrics/metricsql v0.84.7 +# github.com/VictoriaMetrics/metricsql v0.84.8 ## explicit; go 1.24.2 github.com/VictoriaMetrics/metricsql github.com/VictoriaMetrics/metricsql/binaryop From 352dc5aa39c9a9ebb352a7387c3fb5d1f160da94 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 15:07:22 +0800 Subject: [PATCH 11/26] feature: [dependency] add more comments to the dependency functions --- .../backgroundtask/servicegraph.go | 24 +++++---- app/vtinsert/opentelemetry/opentelemetry.go | 2 +- app/vtselect/traces/jaeger/jaeger.go | 8 +-- app/vtselect/traces/query/query.go | 50 ++++++++++++++----- .../opentelemetry/pb/internal_fields.go | 20 ++++++++ .../opentelemetry/pb/trace_fields.go | 9 ---- 6 files changed, 77 insertions(+), 36 deletions(-) create mode 100644 lib/protoparser/opentelemetry/pb/internal_fields.go diff --git a/app/victoria-traces/backgroundtask/servicegraph.go b/app/victoria-traces/backgroundtask/servicegraph.go index 2da542e3d..48d6f610d 100644 --- a/app/victoria-traces/backgroundtask/servicegraph.go +++ b/app/victoria-traces/backgroundtask/servicegraph.go @@ -15,10 +15,11 @@ import ( ) var ( - enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") - serviceGraphInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting `-servicegraph.enable=true`.") - serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting `-servicegraph.enable=true`.") - serviceGraphLookbehind = flag.Duration("servicegraph.lookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") + enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") + serviceGraphTaskInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting `-servicegraph.enable=true`.") + serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting `-servicegraph.enable=true`.") + serviceGraphTaskLookbehind = flag.Duration("servicegraph.taskLookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") + serviceGraphTaskLimit = flag.Uint64("servicegraph.taskLimit", 1000, "How many service graph relations each task could fetch for each tenant. It requires setting `-servicegraph.enable=true`.") ) var ( @@ -51,8 +52,8 @@ func newServiceGraphTask() *serviceGraphTask { } func (sgt *serviceGraphTask) Start() { - logger.Infof("starting background task for service graph, interval: %v, lookbehind: %v", *serviceGraphInterval, *serviceGraphLookbehind) - ticker := time.NewTicker(*serviceGraphInterval) + logger.Infof("starting background task for service graph, interval: %v, lookbehind: %v", *serviceGraphTaskInterval, *serviceGraphTaskLookbehind) + ticker := time.NewTicker(*serviceGraphTaskInterval) go func() { for { select { @@ -74,8 +75,8 @@ func (sgt *serviceGraphTask) Stop() { } func GenerateServiceGraphTimeRange(ctx context.Context) { - endTime := time.Now().Truncate(*serviceGraphInterval) - startTime := endTime.Add(-*serviceGraphLookbehind) + endTime := time.Now().Truncate(*serviceGraphTaskInterval) + startTime := endTime.Add(-*serviceGraphTaskLookbehind) tenantIDs, err := vtstorage.GetTenantIDsByTimeRange(ctx, startTime.UnixNano(), endTime.UnixNano()) if err != nil { @@ -85,10 +86,14 @@ func GenerateServiceGraphTimeRange(ctx context.Context) { // query and persist operations are executed sequentially, which helps not to consume excessive resources. for _, tenantID := range tenantIDs { + // Build a fake HTTP *Request. It helps align the way of handling tenant-related input + // in both vtselect and vtinsert, as they assume tenant info will exist in HTTP headers. r, _ := http.NewRequestWithContext(ctx, "", "", nil) r.Header.Set("AccountID", strconv.FormatUint(uint64(tenantID.AccountID), 10)) r.Header.Set("ProjectID", strconv.FormatUint(uint64(tenantID.ProjectID), 10)) - rows, err := vtselect.GetServiceGraphTimeRange(ctx, r, startTime, endTime) + + // query service graph relations + rows, err := vtselect.GetServiceGraphTimeRange(ctx, r, startTime, endTime, *serviceGraphTaskLimit) if err != nil { logger.Errorf("cannot get service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) return @@ -97,6 +102,7 @@ func GenerateServiceGraphTimeRange(ctx context.Context) { return } + // persist service graph relations err = vtinsert.PersistServiceGraph(ctx, r, rows, endTime) if err != nil { logger.Errorf("cannot presist service graph for time %d: %s", endTime.Unix(), err) diff --git a/app/vtinsert/opentelemetry/opentelemetry.go b/app/vtinsert/opentelemetry/opentelemetry.go index b0554a799..e6e89c81c 100644 --- a/app/vtinsert/opentelemetry/opentelemetry.go +++ b/app/vtinsert/opentelemetry/opentelemetry.go @@ -298,7 +298,7 @@ func PersistServiceGraph(ctx context.Context, r *http.Request, fields [][]logsto Name: "_msg", Value: "-", }) - lmp.AddRow(timestamp.UnixNano(), f, []logstorage.Field{{otelpb.TraceServiceGraphStreamName, "-"}}) + lmp.AddRow(timestamp.UnixNano(), f, []logstorage.Field{{otelpb.ServiceGraphStreamName, "-"}}) } lmp.MustClose() return nil diff --git a/app/vtselect/traces/jaeger/jaeger.go b/app/vtselect/traces/jaeger/jaeger.go index 6b78640b4..997dcd7a4 100644 --- a/app/vtselect/traces/jaeger/jaeger.go +++ b/app/vtselect/traces/jaeger/jaeger.go @@ -415,7 +415,7 @@ func processGetDependenciesRequest(ctx context.Context, w http.ResponseWriter, r return } - rows, err := query.GetDependencyList(ctx, cp, param) + rows, err := query.GetServiceGraphList(ctx, cp, param) if err != nil { httpserver.Errorf(w, r, "get dependencies error: %s", err) return @@ -455,12 +455,12 @@ func processGetDependenciesRequest(ctx context.Context, w http.ResponseWriter, r WriteGetDependenciesResponse(w, dependencies) } -// parseJaegerDependenciesQueryParam parse Jaeger request to unified DependenciesQueryParameters. -func parseJaegerDependenciesQueryParam(_ context.Context, r *http.Request) (*query.DependenciesQueryParameters, error) { +// parseJaegerDependenciesQueryParam parse Jaeger request to unified ServiceGraphQueryParameters. +func parseJaegerDependenciesQueryParam(_ context.Context, r *http.Request) (*query.ServiceGraphQueryParameters, error) { var err error // default params - p := &query.DependenciesQueryParameters{ + p := &query.ServiceGraphQueryParameters{ EndTs: time.Now(), Lookback: time.Minute * 1, } diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index be712e568..4b6d6000e 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -567,14 +567,27 @@ func checkTraceIDList(traceIDList []string) []string { return result } -type DependenciesQueryParameters struct { +type ServiceGraphQueryParameters struct { EndTs time.Time Lookback time.Duration } -// GetDependencyList returns service dependencies graph edges (parent, child, callCount) in []*Row format. -func GetDependencyList(ctx context.Context, cp *CommonParams, param *DependenciesQueryParameters) ([]*Row, error) { - qStr := `{trace_service_graph_stream="-"} | fields parent, child, callCount | stats by (parent, child) sum(callCount) as callCount` +// GetServiceGraphList returns service dependencies graph edges (parent, child, callCount) in []*Row format. +// +// TODO: currently this function can only handle request from Jaeger dependencies API. Since Tempo provides similar service graph +// feature, it would be great to add support for Tempo service graph API as well. +func GetServiceGraphList(ctx context.Context, cp *CommonParams, param *ServiceGraphQueryParameters) ([]*Row, error) { + // {trace_service_graph_stream="-"} | fields parent, child, callCount | stats by (parent, child) sum(callCount) as callCount + qStr := fmt.Sprintf(`{%s="-"} | fields %s, %s, %s | stats by (%s, %s) sum(%s) as %s`, + otelpb.ServiceGraphStreamName, + otelpb.ServiceGraphParentFieldName, + otelpb.ServiceGraphChildFieldName, + otelpb.ServiceGraphCallCountFieldName, + otelpb.ServiceGraphParentFieldName, + otelpb.ServiceGraphChildFieldName, + otelpb.ServiceGraphCallCountFieldName, + otelpb.ServiceGraphCallCountFieldName, + ) startTime := param.EndTs.Add(-param.Lookback).UnixNano() endTime := param.EndTs.UnixNano() q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime) @@ -631,15 +644,17 @@ func GetDependencyList(ctx context.Context, cp *CommonParams, param *Dependencie return rows, nil } -func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, endTime time.Time) ([][]logstorage.Field, error) { +// GetServiceGraphTimeRange calculate the service graph relation within the time range in (parent, child, callCount) format. +func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, endTime time.Time, limit uint64) ([][]logstorage.Field, error) { cp, err := GetCommonParams(r) if err != nil { return nil, err } + // (NOT parent_span_id:"") AND (kind:~"2|5") | fields parent_span_id, resource_attr:service.name | rename parent_span_id as span_id, resource_attr:service.name as child qStrChildSpans := fmt.Sprintf( - `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s, %s as child`, - otelpb.ParentSpanIDField, - otelpb.KindField, + `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s, %s as %s`, + otelpb.ParentSpanIDField, // parent span id not empty means this span is a child span. + otelpb.KindField, // only server(2) and consumer(5) span could be used as a child. It helps reduce the spans it needs to fetch. otelpb.SpanKind(2), otelpb.SpanKind(5), otelpb.ParentSpanIDField, @@ -647,22 +662,31 @@ func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, e otelpb.ParentSpanIDField, otelpb.SpanIDField, otelpb.ResourceAttrServiceName, + otelpb.ServiceGraphChildFieldName, ) + // (NOT span_id:"") AND (kind:~"3|4") | fields span_id, resource_attr:service.name | rename resource_attr:service.name as parent qStrParentSpans := fmt.Sprintf( - `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as parent`, - otelpb.SpanIDField, - otelpb.KindField, + `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s`, + otelpb.SpanIDField, // Any span could be a parent span, as long as it has a span ID. + otelpb.KindField, // only client(3) and producer(4) span could be used as a parent. It helps reduce the spans it needs to fetch. otelpb.SpanKind(3), otelpb.SpanKind(4), otelpb.SpanIDField, otelpb.ResourceAttrServiceName, otelpb.ResourceAttrServiceName, + otelpb.ServiceGraphParentFieldName, ) + // join by span_id qStr := fmt.Sprintf( - `%s | join by (%s) (%s) inner | NOT parent:eq_field(child) | stats by (parent, child) count() callCount`, + `%s | join by (%s) (%s) inner | NOT %s:eq_field(%s) | stats by (%s, %s) count() %s`, qStrChildSpans, otelpb.SpanIDField, qStrParentSpans, + otelpb.ServiceGraphParentFieldName, + otelpb.ServiceGraphChildFieldName, + otelpb.ServiceGraphParentFieldName, + otelpb.ServiceGraphChildFieldName, + otelpb.ServiceGraphCallCountFieldName, ) q, err := logstorage.ParseQueryAtTimestamp(qStr, endTime.UnixNano()) @@ -670,7 +694,7 @@ func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, e return nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err) } q.AddTimeFilter(startTime.UnixNano(), endTime.UnixNano()) - q.AddPipeOffsetLimit(0, 1000) + q.AddPipeOffsetLimit(0, limit) cp.Query = q qctx := cp.NewQueryContext(ctx) diff --git a/lib/protoparser/opentelemetry/pb/internal_fields.go b/lib/protoparser/opentelemetry/pb/internal_fields.go new file mode 100644 index 000000000..64edc4b8a --- /dev/null +++ b/lib/protoparser/opentelemetry/pb/internal_fields.go @@ -0,0 +1,20 @@ +package pb + +// internal_fields.go contains the stream names/values, field names/values that VictoriaTraces required/generated. +// +// They're NOT part of the OpenTelemetry standard. + +// TraceID index stream and fields +const ( + TraceIDIndexStreamName = "trace_id_idx_stream" + TraceIDIndexFieldName = "trace_id_idx" + TraceIDIndexPartitionCount = uint64(1024) +) + +// service graph stream and fields +const ( + ServiceGraphStreamName = "trace_service_graph_stream" + ServiceGraphParentFieldName = "parent" + ServiceGraphChildFieldName = "child" + ServiceGraphCallCountFieldName = "call_count" +) diff --git a/lib/protoparser/opentelemetry/pb/trace_fields.go b/lib/protoparser/opentelemetry/pb/trace_fields.go index 4a9b6a34d..5709c9e06 100644 --- a/lib/protoparser/opentelemetry/pb/trace_fields.go +++ b/lib/protoparser/opentelemetry/pb/trace_fields.go @@ -2,15 +2,6 @@ package pb // trace_fields.go contains field names when storing OTLP trace span data in VictoriaLogs. -// Special: TraceID index stream and fields -const ( - TraceIDIndexStreamName = "trace_id_idx_stream" - TraceIDIndexFieldName = "trace_id_idx" - TraceIDIndexPartitionCount = uint64(1024) - - TraceServiceGraphStreamName = "trace_service_graph_stream" -) - // Resource const ( ResourceAttrPrefix = "resource_attr:" From c84411c31e5d811b0ff96325d52f0c485bc2c81d Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 15:11:12 +0800 Subject: [PATCH 12/26] feature: [dependency] make linter happy --- app/victoria-traces/backgroundtask/servicegraph.go | 6 ------ app/vtinsert/opentelemetry/opentelemetry.go | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/app/victoria-traces/backgroundtask/servicegraph.go b/app/victoria-traces/backgroundtask/servicegraph.go index 48d6f610d..27a1f6c6f 100644 --- a/app/victoria-traces/backgroundtask/servicegraph.go +++ b/app/victoria-traces/backgroundtask/servicegraph.go @@ -31,14 +31,12 @@ func Init() { sgt = newServiceGraphTask() sgt.Start() } - return } func Stop() { if *enableServiceGraph { sgt.Stop() } - return } type serviceGraphTask struct { @@ -66,12 +64,10 @@ func (sgt *serviceGraphTask) Start() { } } }() - return } func (sgt *serviceGraphTask) Stop() { close(sgt.stopCh) - return } func GenerateServiceGraphTimeRange(ctx context.Context) { @@ -108,6 +104,4 @@ func GenerateServiceGraphTimeRange(ctx context.Context) { logger.Errorf("cannot presist service graph for time %d: %s", endTime.Unix(), err) } } - - return } diff --git a/app/vtinsert/opentelemetry/opentelemetry.go b/app/vtinsert/opentelemetry/opentelemetry.go index e6e89c81c..c0f9b1c3e 100644 --- a/app/vtinsert/opentelemetry/opentelemetry.go +++ b/app/vtinsert/opentelemetry/opentelemetry.go @@ -298,7 +298,7 @@ func PersistServiceGraph(ctx context.Context, r *http.Request, fields [][]logsto Name: "_msg", Value: "-", }) - lmp.AddRow(timestamp.UnixNano(), f, []logstorage.Field{{otelpb.ServiceGraphStreamName, "-"}}) + lmp.AddRow(timestamp.UnixNano(), f, []logstorage.Field{{Name: otelpb.ServiceGraphStreamName, Value: "-"}}) } lmp.MustClose() return nil From 827912a30dbd58abfad22328c6d7085bc4cacc94 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 16:26:27 +0800 Subject: [PATCH 13/26] feature: [dependency] add integration test, and fix the call count field name --- apptest/client.go | 2 +- apptest/model.go | 49 ++++- apptest/tests/service_graph_test.go | 203 ++++++++++++++++++ apptest/vtsingle.go | 34 ++- .../opentelemetry/pb/internal_fields.go | 2 +- 5 files changed, 278 insertions(+), 12 deletions(-) create mode 100644 apptest/tests/service_graph_test.go diff --git a/apptest/client.go b/apptest/client.go index 01ee9761b..14099d4ed 100644 --- a/apptest/client.go +++ b/apptest/client.go @@ -149,7 +149,7 @@ func (app *ServesMetrics) GetMetric(t *testing.T, metricName string) float64 { return res } } - t.Fatalf("metric not found: %s", metricName) + t.Logf("metric not found: %s", metricName) return 0 } diff --git a/apptest/model.go b/apptest/model.go index 9cd59cb07..1a05c9db5 100644 --- a/apptest/model.go +++ b/apptest/model.go @@ -63,7 +63,7 @@ type JaegerQuerier interface { JaegerAPIOperations(t *testing.T, serviceName string, opts QueryOpts) *JaegerAPIOperationsResponse JaegerAPITraces(t *testing.T, params JaegerQueryParam, opts QueryOpts) *JaegerAPITracesResponse JaegerAPITrace(t *testing.T, traceID string, opts QueryOpts) *JaegerAPITraceResponse - JaegerAPIDependencies(t *testing.T, opts QueryOpts) + JaegerAPIDependencies(t *testing.T, params JaegerDependenciesParam, opts QueryOpts) *JaegerAPIDependenciesResponse } // OTLPTracesWriter contains methods for writing OTLP trace data. @@ -259,3 +259,50 @@ func NewJaegerAPITraceResponse(t *testing.T, s string) *JaegerAPITraceResponse { } return res } + +// NewJaegerAPIDependenciesResponse is a test helper function that creates a new +// instance of JaegerAPIDependenciesResponse by unmarshalling a json string. +func NewJaegerAPIDependenciesResponse(t *testing.T, s string) *JaegerAPIDependenciesResponse { + t.Helper() + + res := &JaegerAPIDependenciesResponse{} + if err := json.Unmarshal([]byte(s), res); err != nil { + t.Fatalf("could not unmarshal query response data=\n%s\n: %v", string(s), err) + } + return res +} + +// JaegerDependenciesParam is a helper structure for implementing extra +// helper functions of `query.ServiceGraphQueryParameters`. +type JaegerDependenciesParam struct { + query.ServiceGraphQueryParameters +} + +// asURLValues add non-empty jaeger dependencies params as URL values. +func (jqp *JaegerDependenciesParam) asURLValues() url.Values { + uv := make(url.Values) + addNonEmpty := func(name string, values ...string) { + for _, value := range values { + if len(value) == 0 { + continue + } + uv.Add(name, value) + } + } + + addNonEmpty("endTs", strconv.FormatInt(jqp.EndTs.UnixMilli(), 10)) + addNonEmpty("lookback", strconv.FormatInt(jqp.Lookback.Milliseconds(), 10)) + + return uv +} + +type JaegerAPIDependenciesResponse struct { + Data []DependenciesResponseData `json:"data"` + JaegerResponse +} + +type DependenciesResponseData struct { + Parent string `json:"parent"` + Child string `json:"child"` + CallCount int `json:"callCount"` +} diff --git a/apptest/tests/service_graph_test.go b/apptest/tests/service_graph_test.go new file mode 100644 index 000000000..5ff8dc27c --- /dev/null +++ b/apptest/tests/service_graph_test.go @@ -0,0 +1,203 @@ +package tests + +import ( + "os" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect/traces/query" + at "github.com/VictoriaMetrics/VictoriaTraces/apptest" + otelpb "github.com/VictoriaMetrics/VictoriaTraces/lib/protoparser/opentelemetry/pb" +) + +// TestSingleServiceGraphGenerationJaegerQuery test service graph data generation +// and query of `/select/jaeger/api/dependencies` API for vt-single. +func TestSingleServiceGraphGenerationJaegerQuery(t *testing.T) { + os.RemoveAll(t.Name()) + + tc := at.NewTestCase(t) + defer tc.Stop() + + sut := tc.MustStartVtsingle("vtsingle", []string{ + "-storageDataPath=" + tc.Dir() + "/vtsingle", + "-retentionPeriod=100y", + "-servicegraph.enable=true", + "-servicegraph.taskInterval=1s", + }) + + testServiceGraphGenerationJaegerQuery(tc, sut) +} + +func testServiceGraphGenerationJaegerQuery(tc *at.TestCase, sut at.VictoriaTracesWriteQuerier) { + t := tc.T() + + parentServiceName, childServiceName := prepareTraceParentAndChildSpanData(tc, sut) + + // wait for service graph data to be generated + tc.Assert(&at.AssertOptions{ + Msg: "service graph data not generated", + Got: func() any { + return getServiceGraphRowsInsertedTotal(t, sut) >= 1 + }, + Want: true, + Retries: 10, + Period: time.Second, + }) + sut.ForceFlush(t) + + // verify the service graph relations via /select/jaeger/api/dependencies + tc.Assert(&at.AssertOptions{ + Msg: "unexpected /select/jaeger/api/dependencies response", + Got: func() any { + return sut.JaegerAPIDependencies(t, at.JaegerDependenciesParam{ + ServiceGraphQueryParameters: query.ServiceGraphQueryParameters{ + EndTs: time.Now(), + Lookback: time.Minute, + }, + }, at.QueryOpts{}) + }, + Want: &at.JaegerAPIDependenciesResponse{ + Data: []at.DependenciesResponseData{ + { + Parent: parentServiceName, + Child: childServiceName, + CallCount: 1, + }, + }, + }, + CmpOpts: []cmp.Option{ + cmpopts.IgnoreFields(at.JaegerAPIDependenciesResponse{}, "Errors", "Limit", "Offset", "Total"), + cmpopts.IgnoreFields(at.DependenciesResponseData{}, "CallCount"), + }, + }) +} + +func prepareTraceParentAndChildSpanData(tc *at.TestCase, sut at.VictoriaTracesWriteQuerier) (string, string) { + t := tc.T() + + // important data required by verification. + parentServiceName := "testServiceGraphServiceNameParent" + childServiceName := "testServiceGraphServiceNameChild" + + // prepare test data for ingestion and assertion. + parentSpanID := "987654321" + childSpanID := "9876543210" + + spanName := "testKeyIngestQuerySpan" + traceID := "123456789" + testTagValue := "testValue" + testTag := []*otelpb.KeyValue{ + { + Key: "testTag", + Value: &otelpb.AnyValue{ + StringValue: &testTagValue, + }, + }, + } + spanTime := time.Now() + + parentSpanReq := &otelpb.ExportTraceServiceRequest{ + ResourceSpans: []*otelpb.ResourceSpans{ + { + Resource: otelpb.Resource{ + Attributes: []*otelpb.KeyValue{ + { + Key: "service.name", + Value: &otelpb.AnyValue{ + StringValue: &parentServiceName, + }, + }, + }, + }, + ScopeSpans: []*otelpb.ScopeSpans{ + { + Scope: otelpb.InstrumentationScope{ + Name: "testInstrumentation", + Version: "1.0", + }, + Spans: []*otelpb.Span{ + { + TraceID: traceID, + SpanID: parentSpanID, + TraceState: "trace_state", + ParentSpanID: "", // root span + Flags: 1, + Name: spanName, + Kind: otelpb.SpanKind(3), // parent span must be 3 or 4, 3 means client + StartTimeUnixNano: uint64(spanTime.UnixNano()), + EndTimeUnixNano: uint64(spanTime.UnixNano()), + Attributes: testTag, + Events: []*otelpb.SpanEvent{}, + Links: []*otelpb.SpanLink{}, + Status: otelpb.Status{}, + }, + }, + }, + }, + }, + }, + } + + childSpanReq := &otelpb.ExportTraceServiceRequest{ + ResourceSpans: []*otelpb.ResourceSpans{ + { + Resource: otelpb.Resource{ + Attributes: []*otelpb.KeyValue{ + { + Key: "service.name", + Value: &otelpb.AnyValue{ + StringValue: &childServiceName, + }, + }, + }, + }, + ScopeSpans: []*otelpb.ScopeSpans{ + { + Scope: otelpb.InstrumentationScope{ + Name: "testInstrumentation", + Version: "1.0", + }, + Spans: []*otelpb.Span{ + { + TraceID: traceID, + SpanID: childSpanID, + TraceState: "trace_state", + ParentSpanID: parentSpanID, + Flags: 1, + Name: spanName, + Kind: otelpb.SpanKind(2), // child span must be 2 or 5, 2 means server + StartTimeUnixNano: uint64(spanTime.UnixNano()), + EndTimeUnixNano: uint64(spanTime.UnixNano()), + Attributes: testTag, + Events: []*otelpb.SpanEvent{}, + Links: []*otelpb.SpanLink{}, + Status: otelpb.Status{}, + }, + }, + }, + }, + }, + }, + } + + // ingest data via /insert/opentelemetry/v1/traces + sut.OTLPExportTraces(t, parentSpanReq, at.QueryOpts{}) + sut.OTLPExportTraces(t, childSpanReq, at.QueryOpts{}) + return parentServiceName, childServiceName +} + +func getServiceGraphRowsInsertedTotal(t *testing.T, sut at.VictoriaTracesWriteQuerier) int { + t.Helper() + + selector := `vt_rows_ingested_total{type="internalinsert_servicegraph"}` + switch tt := sut.(type) { + case *at.Vtsingle: + return tt.GetIntMetric(t, selector) + default: + t.Fatalf("unexpected type: got %T, want *Vtsingle", sut) + } + return 0 +} diff --git a/apptest/vtsingle.go b/apptest/vtsingle.go index 56b291968..546f91100 100644 --- a/apptest/vtsingle.go +++ b/apptest/vtsingle.go @@ -23,10 +23,11 @@ type Vtsingle struct { forceFlushURL string forceMergeURL string - jaegerAPIServicesURL string - jaegerAPIOperationsURL string - jaegerAPITracesURL string - jaegerAPITraceURL string + jaegerAPIServicesURL string + jaegerAPIOperationsURL string + jaegerAPITracesURL string + jaegerAPITraceURL string + jaegerAPIDependenciesURL string otlpTracesURL string } @@ -61,10 +62,11 @@ func StartVtsingle(instance string, flags []string, cli *Client) (*Vtsingle, err forceFlushURL: fmt.Sprintf("http://%s/internal/force_flush", stderrExtracts[1]), forceMergeURL: fmt.Sprintf("http://%s/internal/force_merge", stderrExtracts[1]), - jaegerAPIServicesURL: fmt.Sprintf("http://%s/select/jaeger/api/services", stderrExtracts[1]), - jaegerAPIOperationsURL: fmt.Sprintf("http://%s/select/jaeger/api/services/%%s/operations", stderrExtracts[1]), - jaegerAPITracesURL: fmt.Sprintf("http://%s/select/jaeger/api/traces", stderrExtracts[1]), - jaegerAPITraceURL: fmt.Sprintf("http://%s/select/jaeger/api/traces/%%s", stderrExtracts[1]), + jaegerAPIServicesURL: fmt.Sprintf("http://%s/select/jaeger/api/services", stderrExtracts[1]), + jaegerAPIOperationsURL: fmt.Sprintf("http://%s/select/jaeger/api/services/%%s/operations", stderrExtracts[1]), + jaegerAPITracesURL: fmt.Sprintf("http://%s/select/jaeger/api/traces", stderrExtracts[1]), + jaegerAPITraceURL: fmt.Sprintf("http://%s/select/jaeger/api/traces/%%s", stderrExtracts[1]), + jaegerAPIDependenciesURL: fmt.Sprintf("http://%s/select/jaeger/api/dependencies", stderrExtracts[1]), otlpTracesURL: fmt.Sprintf("http://%s/insert/opentelemetry/v1/traces", stderrExtracts[1]), }, nil @@ -143,7 +145,21 @@ func (app *Vtsingle) JaegerAPITrace(t *testing.T, traceID string, opts QueryOpts // JaegerAPIDependencies is a test helper function that queries for the dependencies. // This method is not implemented in Vtsingle and this test is no-op for now. -func (app *Vtsingle) JaegerAPIDependencies(_ *testing.T, _ QueryOpts) {} +func (app *Vtsingle) JaegerAPIDependencies(t *testing.T, param JaegerDependenciesParam, opts QueryOpts) *JaegerAPIDependenciesResponse { + t.Helper() + + paramsEnc := "?" + values := opts.asURLValues() + if len(values) > 0 { + paramsEnc += values.Encode() + "&" + } + uv := param.asURLValues() + if len(uv) > 0 { + paramsEnc += uv.Encode() + } + res, _ := app.cli.Get(t, app.jaegerAPIDependenciesURL+paramsEnc) + return NewJaegerAPIDependenciesResponse(t, res) +} // OTLPExportTraces is a test helper function that exports OTLP trace data // by sending an HTTP POST request to /insert/opentelemetry/v1/traces diff --git a/lib/protoparser/opentelemetry/pb/internal_fields.go b/lib/protoparser/opentelemetry/pb/internal_fields.go index 64edc4b8a..8a6616c78 100644 --- a/lib/protoparser/opentelemetry/pb/internal_fields.go +++ b/lib/protoparser/opentelemetry/pb/internal_fields.go @@ -16,5 +16,5 @@ const ( ServiceGraphStreamName = "trace_service_graph_stream" ServiceGraphParentFieldName = "parent" ServiceGraphChildFieldName = "child" - ServiceGraphCallCountFieldName = "call_count" + ServiceGraphCallCountFieldName = "callCount" ) From 8d20559eeca8b1a973429fb36793b07003193ee6 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 16:35:23 +0800 Subject: [PATCH 14/26] feature: [dependency] add another test case, where child calls parent as well, but the span kind was wrong, which wont be included in service graph result --- apptest/tests/service_graph_test.go | 100 ++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/apptest/tests/service_graph_test.go b/apptest/tests/service_graph_test.go index 5ff8dc27c..4e4d7f47d 100644 --- a/apptest/tests/service_graph_test.go +++ b/apptest/tests/service_graph_test.go @@ -186,6 +186,106 @@ func prepareTraceParentAndChildSpanData(tc *at.TestCase, sut at.VictoriaTracesWr // ingest data via /insert/opentelemetry/v1/traces sut.OTLPExportTraces(t, parentSpanReq, at.QueryOpts{}) sut.OTLPExportTraces(t, childSpanReq, at.QueryOpts{}) + + // case: 2 + // ingest invalid data via /insert/opentelemetry/v1/traces + // the invalid data attempt to generate `child service (calls) parent service` relation. + // but the span kind was set to incorrect (server calls client). + //So it should not generate a service graph edge in the result. + + // prepare test data for ingestion and assertion. + parentSpanID = "0987654321" + childSpanID = "09876543210" + + spanName = "testKeyIngestQuerySpan_invalid" + traceID = "0123456789" + + invalidParentSpanReq := &otelpb.ExportTraceServiceRequest{ + ResourceSpans: []*otelpb.ResourceSpans{ + { + Resource: otelpb.Resource{ + Attributes: []*otelpb.KeyValue{ + { + Key: "service.name", + Value: &otelpb.AnyValue{ + StringValue: &childServiceName, // attempt to generate `child calls parent`, so parent service should be `child`. + }, + }, + }, + }, + ScopeSpans: []*otelpb.ScopeSpans{ + { + Scope: otelpb.InstrumentationScope{ + Name: "testInstrumentation", + Version: "1.0", + }, + Spans: []*otelpb.Span{ + { + TraceID: traceID, + SpanID: parentSpanID, + TraceState: "trace_state", + ParentSpanID: "", // root span + Flags: 1, + Name: spanName, + Kind: otelpb.SpanKind(2), // parent span set to 2 (server), which is invalid + StartTimeUnixNano: uint64(spanTime.UnixNano()), + EndTimeUnixNano: uint64(spanTime.UnixNano()), + Attributes: testTag, + Events: []*otelpb.SpanEvent{}, + Links: []*otelpb.SpanLink{}, + Status: otelpb.Status{}, + }, + }, + }, + }, + }, + }, + } + + invalidChildSpanReq := &otelpb.ExportTraceServiceRequest{ + ResourceSpans: []*otelpb.ResourceSpans{ + { + Resource: otelpb.Resource{ + Attributes: []*otelpb.KeyValue{ + { + Key: "service.name", + Value: &otelpb.AnyValue{ + StringValue: &parentServiceName, // attempt to generate `child calls parent`, so child service should be `parent`. + }, + }, + }, + }, + ScopeSpans: []*otelpb.ScopeSpans{ + { + Scope: otelpb.InstrumentationScope{ + Name: "testInstrumentation", + Version: "1.0", + }, + Spans: []*otelpb.Span{ + { + TraceID: traceID, + SpanID: childSpanID, + TraceState: "trace_state", + ParentSpanID: parentSpanID, + Flags: 1, + Name: spanName, + Kind: otelpb.SpanKind(3), // child span set to 3 (client), which is invalid + StartTimeUnixNano: uint64(spanTime.UnixNano()), + EndTimeUnixNano: uint64(spanTime.UnixNano()), + Attributes: testTag, + Events: []*otelpb.SpanEvent{}, + Links: []*otelpb.SpanLink{}, + Status: otelpb.Status{}, + }, + }, + }, + }, + }, + }, + } + + sut.OTLPExportTraces(t, invalidParentSpanReq, at.QueryOpts{}) + sut.OTLPExportTraces(t, invalidChildSpanReq, at.QueryOpts{}) return parentServiceName, childServiceName } From 793343d238f757b449398d79ff6bea70d20fe412 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 16:40:41 +0800 Subject: [PATCH 15/26] feature: [dependency] add comment for the test case --- apptest/tests/service_graph_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apptest/tests/service_graph_test.go b/apptest/tests/service_graph_test.go index 4e4d7f47d..e5ae68ed4 100644 --- a/apptest/tests/service_graph_test.go +++ b/apptest/tests/service_graph_test.go @@ -34,6 +34,12 @@ func TestSingleServiceGraphGenerationJaegerQuery(t *testing.T) { func testServiceGraphGenerationJaegerQuery(tc *at.TestCase, sut at.VictoriaTracesWriteQuerier) { t := tc.T() + // prepareTraceParentAndChildSpanData generate 4 spans: + // 1. parentService span (with span kind=client) calls childService span (with span kind=server) + // 2. childService span (with span kind=server) calls parentService span (with span kind=client) + // + // Since `server` calls `client` is an invalid case, + // this 4 spans should generate only 1 relation edge: parentService->childService. parentServiceName, childServiceName := prepareTraceParentAndChildSpanData(tc, sut) // wait for service graph data to be generated From a4d13365b9da431bfb13e74847c4e591f3ddaef9 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 16:50:21 +0800 Subject: [PATCH 16/26] feature: [dependency] update doc for background task flag --- app/victoria-traces/backgroundtask/servicegraph.go | 8 ++++---- docs/victoriatraces/README.md | 12 ++++++++++-- docs/victoriatraces/querying/README.md | 3 +++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/app/victoria-traces/backgroundtask/servicegraph.go b/app/victoria-traces/backgroundtask/servicegraph.go index 27a1f6c6f..5d7730ebf 100644 --- a/app/victoria-traces/backgroundtask/servicegraph.go +++ b/app/victoria-traces/backgroundtask/servicegraph.go @@ -16,10 +16,10 @@ import ( var ( enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") - serviceGraphTaskInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting `-servicegraph.enable=true`.") - serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting `-servicegraph.enable=true`.") - serviceGraphTaskLookbehind = flag.Duration("servicegraph.taskLookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting `-servicegraph.enable=true`.") - serviceGraphTaskLimit = flag.Uint64("servicegraph.taskLimit", 1000, "How many service graph relations each task could fetch for each tenant. It requires setting `-servicegraph.enable=true`.") + serviceGraphTaskInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting -servicegraph.enable=true.") + serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting -servicegraph.enable=true.") + serviceGraphTaskLookbehind = flag.Duration("servicegraph.taskLookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enable=true.") + serviceGraphTaskLimit = flag.Uint64("servicegraph.taskLimit", 1000, "How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enable=true.") ) var ( diff --git a/docs/victoriatraces/README.md b/docs/victoriatraces/README.md index a8814813e..d5fb08b98 100644 --- a/docs/victoriatraces/README.md +++ b/docs/victoriatraces/README.md @@ -513,8 +513,6 @@ It is recommended protecting internal HTTP endpoints from unauthorized access: The maximum number of service name can return in a get service name request. This limit affects Jaeger's /api/services API. (default 1000) -search.traceMaxSpanNameList uint The maximum number of span name can return in a get span name request. This limit affects Jaeger's /api/services/*/operations API. (default 1000) - -search.traceMaxDependencyList uint - The maximum number of dependency links can return in a get dependencies request. This limit affects Jaeger's /api/dependencies API. Not limited by default. (default 0) -search.traceSearchStep duration Splits the [0, now] time range into many small time ranges by -search.traceSearchStep when searching for spans by trace_id. Once it finds spans in a time range, it performs an additional search according to -search.traceMaxDurationWindow and then stops. It affects Jaeger's /api/traces/ API. (default 24h0m0s) -search.traceServiceAndSpanNameLookbehind duration @@ -523,6 +521,16 @@ It is recommended protecting internal HTTP endpoints from unauthorized access: Whether to disable /select/* HTTP endpoints -select.disableCompression Whether to disable compression for select query responses received from -storageNode nodes. Disabled compression reduces CPU usage at the cost of higher network usage + -servicegraph.enable + Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage. + -servicegraph.taskInterval duration + The background task interval for generating service graph data. It requires setting -servicegraph.enable=true. (default 1m0s) + -servicegraph.taskLimit uint + How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enable=true. (default 1000) + -servicegraph.taskLookbehind duration + The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enable=true. (default 1m0s) + -servicegraph.taskTimeout duration + The background task timeout duration for generating service graph data. It requires setting -servicegraph.enable=true. (default 30s) -storage.minFreeDiskSpaceBytes size The minimum free disk space at -storageDataPath after which the storage stops accepting new data Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 4e8ffbc4d..8ad6034ec 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -132,6 +132,9 @@ Here's a response example: #### Querying dependencies +> To enable dependencies visualization, you **must** set `-servicegraph.enable` to `true` on VictoriaTraces single-node or +> vtstorage to run the background task, which generates service graph data periodically. See also: `-servicegraph.*` flags. + The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint. This endpoint provides the following params: From 2eaf89251452a9311d51c4a61fbcd64479245ca8 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 28 Sep 2025 16:52:53 +0800 Subject: [PATCH 17/26] feature: [dependency] update doc for dependencies API --- docs/victoriatraces/querying/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 8ad6034ec..63d5380a5 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -139,10 +139,7 @@ The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP This endpoint provides the following params: - `endTs`: the end timestamp in unix milliseconds. Current timestamp will be used if empty. -- `lookback`: the lookbehind window duration in milliseconds. Default to `1m` if empty (not controlled by `-search.traceMaxDependencyLookbehind` flag). - -By default, the `lookback` duration cannot exceed `-search.traceMaxDependencyLookbehind` flag (default `1m`), which help preventing -heavy queries for dependency analysis. +- `lookback`: the lookbehind window duration in milliseconds. Default to `1m` if empty. Here are examples of the dependency API: From bf8b386f884b568f3f468362412ebb4a2de64f02 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 5 Oct 2025 10:29:55 +0800 Subject: [PATCH 18/26] chore: apply review suggestions. increased default lookbehind for service graph to 1h. renamed some flags and wording. --- app/victoria-traces/main.go | 8 +++++--- .../servicegraph.go | 16 ++++++++-------- app/vtselect/traces/jaeger/jaeger.go | 2 +- apptest/tests/service_graph_test.go | 2 +- docs/victoriatraces/README.md | 10 +++++----- 5 files changed, 20 insertions(+), 18 deletions(-) rename app/victoria-traces/{backgroundtask => servicegraph}/servicegraph.go (87%) diff --git a/app/victoria-traces/main.go b/app/victoria-traces/main.go index 54a890ed9..9a0aae72e 100644 --- a/app/victoria-traces/main.go +++ b/app/victoria-traces/main.go @@ -15,7 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics" - "github.com/VictoriaMetrics/VictoriaTraces/app/victoria-traces/backgroundtask" + "github.com/VictoriaMetrics/VictoriaTraces/app/victoria-traces/servicegraph" "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert" "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/insertutil" "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect" @@ -49,7 +49,9 @@ func main() { insertutil.SetLogRowsStorage(&vtstorage.Storage{}) vtinsert.Init() - backgroundtask.Init() + + // optional background task(s) + servicegraph.Init() go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{ UseProxyProtocol: useProxyProtocol, @@ -68,7 +70,7 @@ func main() { } logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds()) - backgroundtask.Stop() + servicegraph.Stop() vtinsert.Stop() vtselect.Stop() vtstorage.Stop() diff --git a/app/victoria-traces/backgroundtask/servicegraph.go b/app/victoria-traces/servicegraph/servicegraph.go similarity index 87% rename from app/victoria-traces/backgroundtask/servicegraph.go rename to app/victoria-traces/servicegraph/servicegraph.go index 5d7730ebf..de2c0eadb 100644 --- a/app/victoria-traces/backgroundtask/servicegraph.go +++ b/app/victoria-traces/servicegraph/servicegraph.go @@ -1,4 +1,4 @@ -package backgroundtask +package servicegraph import ( "context" @@ -15,11 +15,11 @@ import ( ) var ( - enableServiceGraph = flag.Bool("servicegraph.enable", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") - serviceGraphTaskInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting -servicegraph.enable=true.") - serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting -servicegraph.enable=true.") - serviceGraphTaskLookbehind = flag.Duration("servicegraph.taskLookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enable=true.") - serviceGraphTaskLimit = flag.Uint64("servicegraph.taskLimit", 1000, "How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enable=true.") + enableServiceGraphTask = flag.Bool("servicegraph.enableTask", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") + serviceGraphTaskInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting -servicegraph.enableTask=true.") + serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting -servicegraph.enableTask=true.") + serviceGraphTaskLookbehind = flag.Duration("servicegraph.taskLookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enableTask=true.") + serviceGraphTaskLimit = flag.Uint64("servicegraph.taskLimit", 1000, "How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enableTask=true.") ) var ( @@ -27,14 +27,14 @@ var ( ) func Init() { - if *enableServiceGraph { + if *enableServiceGraphTask { sgt = newServiceGraphTask() sgt.Start() } } func Stop() { - if *enableServiceGraph { + if *enableServiceGraphTask { sgt.Stop() } } diff --git a/app/vtselect/traces/jaeger/jaeger.go b/app/vtselect/traces/jaeger/jaeger.go index 997dcd7a4..a117ab2fc 100644 --- a/app/vtselect/traces/jaeger/jaeger.go +++ b/app/vtselect/traces/jaeger/jaeger.go @@ -462,7 +462,7 @@ func parseJaegerDependenciesQueryParam(_ context.Context, r *http.Request) (*que // default params p := &query.ServiceGraphQueryParameters{ EndTs: time.Now(), - Lookback: time.Minute * 1, + Lookback: time.Hour, } q := r.URL.Query() diff --git a/apptest/tests/service_graph_test.go b/apptest/tests/service_graph_test.go index e5ae68ed4..0cd725cbb 100644 --- a/apptest/tests/service_graph_test.go +++ b/apptest/tests/service_graph_test.go @@ -24,7 +24,7 @@ func TestSingleServiceGraphGenerationJaegerQuery(t *testing.T) { sut := tc.MustStartVtsingle("vtsingle", []string{ "-storageDataPath=" + tc.Dir() + "/vtsingle", "-retentionPeriod=100y", - "-servicegraph.enable=true", + "-servicegraph.enableTask=true", "-servicegraph.taskInterval=1s", }) diff --git a/docs/victoriatraces/README.md b/docs/victoriatraces/README.md index d5fb08b98..1c33a3fde 100644 --- a/docs/victoriatraces/README.md +++ b/docs/victoriatraces/README.md @@ -521,16 +521,16 @@ It is recommended protecting internal HTTP endpoints from unauthorized access: Whether to disable /select/* HTTP endpoints -select.disableCompression Whether to disable compression for select query responses received from -storageNode nodes. Disabled compression reduces CPU usage at the cost of higher network usage - -servicegraph.enable + -servicegraph.enableTask Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage. -servicegraph.taskInterval duration - The background task interval for generating service graph data. It requires setting -servicegraph.enable=true. (default 1m0s) + The background task interval for generating service graph data. It requires setting -servicegraph.enableTask=true. (default 1m0s) -servicegraph.taskLimit uint - How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enable=true. (default 1000) + How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enableTask=true. (default 1000) -servicegraph.taskLookbehind duration - The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enable=true. (default 1m0s) + The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enableTask=true. (default 1m0s) -servicegraph.taskTimeout duration - The background task timeout duration for generating service graph data. It requires setting -servicegraph.enable=true. (default 30s) + The background task timeout duration for generating service graph data. It requires setting -servicegraph.enableTask=true. (default 30s) -storage.minFreeDiskSpaceBytes size The minimum free disk space at -storageDataPath after which the storage stops accepting new data Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10000000) From 757174ec58ecd4132b36661c5e8392e5c493406a Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 5 Oct 2025 10:51:37 +0800 Subject: [PATCH 19/26] feature: [dependency] update dependencies of logstorage --- go.mod | 4 ++-- go.sum | 16 ++++------------ .../logstorage/filter_contains_common_case.go | 17 +++++++++++++---- .../lib/logstorage/storage_search.go | 14 +++++++------- .../github.com/VictoriaMetrics/metrics/set.go | 4 ++-- .../VictoriaMetrics/metrics/summary.go | 14 ++++++++------ vendor/modules.txt | 4 ++-- 7 files changed, 38 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 4b7653a36..b903af8a2 100644 --- a/go.mod +++ b/go.mod @@ -3,11 +3,11 @@ module github.com/VictoriaMetrics/VictoriaTraces go 1.25.1 require ( - github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8 + github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20251002105307-b134e33daa38 github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871 github.com/VictoriaMetrics/easyproto v0.1.4 github.com/VictoriaMetrics/fastcache v1.13.0 - github.com/VictoriaMetrics/metrics v1.40.1 + github.com/VictoriaMetrics/metrics v1.40.2 github.com/cespare/xxhash/v2 v2.3.0 github.com/google/go-cmp v0.7.0 github.com/valyala/fastjson v1.6.4 diff --git a/go.sum b/go.sum index b6907c621..2ccad504a 100644 --- a/go.sum +++ b/go.sum @@ -1,21 +1,13 @@ -github.com/VictoriaMetrics/VictoriaLogs v1.33.1 h1:Oi0Jb+AqCuhVf1E6VTNV2/lRnTcQMERmsVzF4BvjV/A= -github.com/VictoriaMetrics/VictoriaLogs v1.33.1/go.mod h1:Mbyj/sNaXlbms/05TiWK0Yr6bU7WkQHusBhws2OcWbI= -github.com/VictoriaMetrics/VictoriaLogs v1.34.1-0.20250926053343-ba4afbbcde47 h1:JW2dgJ2voOk5Y6mCDNe8OAl3P8Y+24f2GBeh2MGGNeE= -github.com/VictoriaMetrics/VictoriaLogs v1.34.1-0.20250926053343-ba4afbbcde47/go.mod h1:c56BHhaonceOp5ddzKDPFNQmoG/oBE58/JXek+v714o= -github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8 h1:4Q5+H/7SN9r1Bvdr6H9zwjFp93mlLyaDPXdB1dy+iCs= -github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8/go.mod h1:c56BHhaonceOp5ddzKDPFNQmoG/oBE58/JXek+v714o= -github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250903201027-a0a33f0ce1c2 h1:XKK5/XxMw1HdlKic12Wh9rJRKxxpy0MTHBdWi+80MAs= -github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250903201027-a0a33f0ce1c2/go.mod h1:mRRUzTxEuernAU9h20/UeZs11dJA2J8LaP9DxbtCag0= +github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20251002105307-b134e33daa38 h1:oZ+2FnaN5zyK2W12I3VnGmv2svrO3BWAFb4JSLFBaM8= +github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20251002105307-b134e33daa38/go.mod h1:olGXQyE3qNkndn0lgJB1J3cGpotNbqLdb9ln3jEkznE= github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871 h1:5G3BS+OSqN6Lie30l+VoNbV8Ks0irHGzvrPmIpijG+A= github.com/VictoriaMetrics/VictoriaMetrics v0.0.0-20250917082640-2c72ef0f3871/go.mod h1:Lrd9cpKD2edJ+uoXTGEcHMZwnI+vyomJrINld69Z8h4= github.com/VictoriaMetrics/easyproto v0.1.4 h1:r8cNvo8o6sR4QShBXQd1bKw/VVLSQma/V2KhTBPf+Sc= github.com/VictoriaMetrics/easyproto v0.1.4/go.mod h1:QlGlzaJnDfFd8Lk6Ci/fuLxfTo3/GThPs2KH23mv710= github.com/VictoriaMetrics/fastcache v1.13.0 h1:AW4mheMR5Vd9FkAPUv+NH6Nhw+fmbTMGMsNAoA/+4G0= github.com/VictoriaMetrics/fastcache v1.13.0/go.mod h1:hHXhl4DA2fTL2HTZDJFXWgW0LNjo6B+4aj2Wmng3TjU= -github.com/VictoriaMetrics/metrics v1.40.1 h1:FrF5uJRpIVj9fayWcn8xgiI+FYsKGMslzPuOXjdeyR4= -github.com/VictoriaMetrics/metrics v1.40.1/go.mod h1:XE4uudAAIRaJE614Tl5HMrtoEU6+GDZO4QTnNSsZRuA= -github.com/VictoriaMetrics/metricsql v0.84.7 h1:zMONjtEULMbwEYU/qL4Hkc3GDfTTrv1bO+a9lmJf3do= -github.com/VictoriaMetrics/metricsql v0.84.7/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ= +github.com/VictoriaMetrics/metrics v1.40.2 h1:OVSjKcQEx6JAwGeu8/KQm9Su5qJ72TMEW4xYn5vw3Ac= +github.com/VictoriaMetrics/metrics v1.40.2/go.mod h1:XE4uudAAIRaJE614Tl5HMrtoEU6+GDZO4QTnNSsZRuA= github.com/VictoriaMetrics/metricsql v0.84.8 h1:5JXrvPJiYkYNqJVT7+hMZmpAwRHd3txBdlVIw4rJ1VM= github.com/VictoriaMetrics/metricsql v0.84.8/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go index 3eaa6b35c..01424d831 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/filter_contains_common_case.go @@ -62,11 +62,10 @@ func getCommonCasePhrases(phrases []string) ([]string, error) { var dst []string for _, phrase := range phrases { upper := countUpperRunes(phrase) - if upper > 10 { + if upper > 6 { return nil, fmt.Errorf("too many common_case combinations for the %q; reduce the number of uppercase letters here", phrase) } dst = appendCommonCasePhrases(dst, "", phrase) - dst = append(dst, strings.ToUpper(phrase)) } // Deduplicate dst @@ -96,6 +95,7 @@ func countUpperRunes(s string) int { func appendCommonCasePhrases(dst []string, prefix, phrase string) []string { dst = append(dst, prefix+phrase) + dst = append(dst, strings.ToUpper(prefix+phrase)) for off, c := range phrase { if !unicode.IsUpper(c) { @@ -106,9 +106,18 @@ func appendCommonCasePhrases(dst []string, prefix, phrase string) []string { continue } + cLower := unicode.ToLower(c) + + prefixLocal := prefix + phrase[:off] phraseTail := phrase[off+charLen:] - dst = appendCommonCasePhrases(dst, prefix+phrase[:off]+string(unicode.ToLower(c)), phraseTail) - dst = appendCommonCasePhrases(dst, prefix+phrase[:off+charLen], phraseTail) + + dst = appendCommonCasePhrases(dst, prefixLocal+string(cLower), phraseTail) + dst = appendCommonCasePhrases(dst, prefixLocal+string(c), phraseTail) + if prefixLocal != "" { + dst = appendCommonCasePhrases(dst, prefixLocal+" "+string(cLower), phraseTail) + dst = appendCommonCasePhrases(dst, prefixLocal+" "+string(c), phraseTail) + } } + return dst } diff --git a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go index 24027d679..4d6e38ed9 100644 --- a/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go +++ b/vendor/github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage/storage_search.go @@ -1224,12 +1224,13 @@ func (db *DataBlock) initFromBlockResult(br *blockResult) { // // It uses workersCount parallel workers for the search and calls writeBlock for each matching block. func (s *Storage) searchParallel(workersCount int, so *genericSearchOptions, qs *QueryStats, stopCh <-chan struct{}, writeBlock writeBlockResultFunc) { - // Spin up workers - var wgWorkers sync.WaitGroup + // spin up workers + var wg sync.WaitGroup workCh := make(chan *blockSearchWorkBatch, workersCount) - wgWorkers.Add(workersCount) - for i := 0; i < workersCount; i++ { + for workerID := 0; workerID < workersCount; workerID++ { + wg.Add(1) go func(workerID uint) { + defer wg.Done() qsLocal := &QueryStats{} bs := getBlockSearch() bm := getBitmap(0) @@ -1264,8 +1265,7 @@ func (s *Storage) searchParallel(workersCount int, so *genericSearchOptions, qs putBlockSearch(bs) putBitmap(bm) qs.UpdateAtomic(qsLocal) - wgWorkers.Done() - }(uint(i)) + }(uint(workerID)) } // Select partitions according to the selected time range @@ -1314,7 +1314,7 @@ func (s *Storage) searchParallel(workersCount int, so *genericSearchOptions, qs // Wait until workers finish their work close(workCh) - wgWorkers.Wait() + wg.Wait() // Finalize partition search for _, psf := range psfs { diff --git a/vendor/github.com/VictoriaMetrics/metrics/set.go b/vendor/github.com/VictoriaMetrics/metrics/set.go index ffe7b9fca..b8b81b92c 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/set.go +++ b/vendor/github.com/VictoriaMetrics/metrics/set.go @@ -47,10 +47,10 @@ func (s *Set) WritePrometheus(w io.Writer) { return fName1 < fName2 } + // Only summary and quantile(s) have different metric types. + // Sorting by metric type will stabilize the order for summary and quantile(s). mType1 := s.a[i].metric.metricType() mType2 := s.a[j].metric.metricType() - - // stabilize the order for summary and quantiles. if mType1 != mType2 { return mType1 < mType2 } diff --git a/vendor/github.com/VictoriaMetrics/metrics/summary.go b/vendor/github.com/VictoriaMetrics/metrics/summary.go index 1b62988bb..51bccdf98 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/summary.go +++ b/vendor/github.com/VictoriaMetrics/metrics/summary.go @@ -120,7 +120,13 @@ func (sm *Summary) marshalTo(prefix string, w io.Writer) { } func (sm *Summary) metricType() string { - return "summary" + // this metric type should not be printed, because summary (sum and count) + // of the same metric family will be printed after quantile(s). + // If metadata is needed, the metadata from quantile(s) should be used. + // quantile will be printed first, so its metrics type won't be printed as metadata. + // Printing quantiles before sum and count aligns this code with Prometheus behavior. + // See: https://github.com/VictoriaMetrics/metrics/pull/99 + return "unsupported" } func splitMetricName(name string) (string, string) { @@ -201,11 +207,7 @@ func (qv *quantileValue) marshalTo(prefix string, w io.Writer) { } func (qv *quantileValue) metricType() string { - // this metricsType should not be printed, because summary (sum and count) of the same metric family will be printed first, - // and if metadata is needed, the metadata from summary should be used. - // quantile will be printed later, so its metrics type won't be printed as metadata. - // See: https://github.com/VictoriaMetrics/metrics/pull/99 - return "unsupported" + return "summary" } func addTag(name, tag string) string { diff --git a/vendor/modules.txt b/vendor/modules.txt index 7c00b4f0d..3103d13d7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20250927125409-7c4c8381c2d8 +# github.com/VictoriaMetrics/VictoriaLogs v1.35.1-0.20251002105307-b134e33daa38 ## explicit; go 1.25.1 github.com/VictoriaMetrics/VictoriaLogs/lib/logstorage github.com/VictoriaMetrics/VictoriaLogs/lib/prefixfilter @@ -49,7 +49,7 @@ github.com/VictoriaMetrics/easyproto # github.com/VictoriaMetrics/fastcache v1.13.0 ## explicit; go 1.24.0 github.com/VictoriaMetrics/fastcache -# github.com/VictoriaMetrics/metrics v1.40.1 +# github.com/VictoriaMetrics/metrics v1.40.2 ## explicit; go 1.18 github.com/VictoriaMetrics/metrics # github.com/VictoriaMetrics/metricsql v0.84.8 From 29cce0fb7b086ea4d4885911a4aed24c9a7fe61f Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 5 Oct 2025 10:58:59 +0800 Subject: [PATCH 20/26] feature: [dependency] update docs --- docs/victoriatraces/querying/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 63d5380a5..4f5c310cc 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -132,14 +132,14 @@ Here's a response example: #### Querying dependencies -> To enable dependencies visualization, you **must** set `-servicegraph.enable` to `true` on VictoriaTraces single-node or +> To enable dependencies visualization, you **must** set `-servicegraph.enableTask` to `true` on VictoriaTraces single-node or > vtstorage to run the background task, which generates service graph data periodically. See also: `-servicegraph.*` flags. The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint. This endpoint provides the following params: - `endTs`: the end timestamp in unix milliseconds. Current timestamp will be used if empty. -- `lookback`: the lookbehind window duration in milliseconds. Default to `1m` if empty. +- `lookback`: the lookbehind window duration in milliseconds. Default to `1h` if empty. Here are examples of the dependency API: From d1f55b040a303b8173fb9957ad89775bd507aee7 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 5 Oct 2025 11:08:30 +0800 Subject: [PATCH 21/26] feature: [dependency] fix GetMetric according to https://github.com/VictoriaMetrics/VictoriaLogs/commit/52c5f8a0e24cbae35e2131123b597e9777da82c5 --- apptest/client.go | 20 +++++++++++++++----- apptest/tests/service_graph_test.go | 7 ++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/apptest/client.go b/apptest/client.go index 14099d4ed..cbf1e8279 100644 --- a/apptest/client.go +++ b/apptest/client.go @@ -2,6 +2,7 @@ package apptest import ( "bytes" + "fmt" "io" "net" "net/http" @@ -134,9 +135,19 @@ func (app *ServesMetrics) GetIntMetric(t *testing.T, metricName string) int { func (app *ServesMetrics) GetMetric(t *testing.T, metricName string) float64 { t.Helper() + value, err := app.TryGetMetric(t, metricName) + if err != nil { + t.Fatalf("get metric error: %s, %v", metricName, err) + } + return value +} + +func (app *ServesMetrics) TryGetMetric(t *testing.T, metricName string) (float64, error) { + t.Helper() + metrics, statusCode := app.cli.Get(t, app.metricsURL) if statusCode != http.StatusOK { - t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusOK) + return 0, fmt.Errorf("unexpected status code: got %d, want %d", statusCode, http.StatusOK) } for _, metric := range strings.Split(metrics, "\n") { value, found := strings.CutPrefix(metric, metricName) @@ -144,13 +155,12 @@ func (app *ServesMetrics) GetMetric(t *testing.T, metricName string) float64 { value = strings.Trim(value, " ") res, err := strconv.ParseFloat(value, 64) if err != nil { - t.Fatalf("could not parse metric value %s: %v", metric, err) + return 0, fmt.Errorf("could not parse metric value %s: %v", metric, err) } - return res + return res, nil } } - t.Logf("metric not found: %s", metricName) - return 0 + return 0, fmt.Errorf("metric not found: %s", metricName) } // GetMetricsByPrefix retrieves the values of all metrics that start with given diff --git a/apptest/tests/service_graph_test.go b/apptest/tests/service_graph_test.go index 0cd725cbb..501dd4bd1 100644 --- a/apptest/tests/service_graph_test.go +++ b/apptest/tests/service_graph_test.go @@ -301,7 +301,12 @@ func getServiceGraphRowsInsertedTotal(t *testing.T, sut at.VictoriaTracesWriteQu selector := `vt_rows_ingested_total{type="internalinsert_servicegraph"}` switch tt := sut.(type) { case *at.Vtsingle: - return tt.GetIntMetric(t, selector) + // use TryGetMetric instead of TryMetric, to allow retries. + value, err := tt.TryGetMetric(t, selector) + if err != nil { + t.Logf("try get service graph rows failed: %v", err) + } + return int(value) default: t.Fatalf("unexpected type: got %T, want *Vtsingle", sut) } From d3f69e5d17aaf161df987eb8ed86074d167e911e Mon Sep 17 00:00:00 2001 From: Jiekun Date: Sun, 5 Oct 2025 11:13:07 +0800 Subject: [PATCH 22/26] feature: [dependency] claim that service graph is experimental --- docs/victoriatraces/changelog/CHANGELOG.md | 2 +- docs/victoriatraces/querying/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/victoriatraces/changelog/CHANGELOG.md b/docs/victoriatraces/changelog/CHANGELOG.md index c357ac69b..c28e6be7e 100644 --- a/docs/victoriatraces/changelog/CHANGELOG.md +++ b/docs/victoriatraces/changelog/CHANGELOG.md @@ -19,7 +19,7 @@ The following `tip` changes can be tested by building VictoriaTraces components * BUGFIX: all components: restore sorting order of summary and quantile metrics exposed by VictoriaTraces components on `/metrics` page. See [metrics#105](https://github.com/VictoriaMetrics/metrics/pull/105) for details. -* FEATURE: [Single-node VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) and vtselect in [VictoriaTraces cluster](https://docs.victoriametrics.com/victoriatraces/cluster/): support Jaeger [service dependencies graph API](https://www.jaegertracing.io/docs/2.10/architecture/apis/#service-dependencies-graph). See [this pull request](https://github.com/VictoriaMetrics/VictoriaTraces/pull/52) for details. +* FEATURE: [Single-node VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) and vtselect in [VictoriaTraces cluster](https://docs.victoriametrics.com/victoriatraces/cluster/): (experimental) support Jaeger [service dependencies graph API](https://www.jaegertracing.io/docs/2.10/architecture/apis/#service-dependencies-graph). See [this pull request](https://github.com/VictoriaMetrics/VictoriaTraces/pull/52) for details. ## [v0.3.0](https://github.com/VictoriaMetrics/VictoriaTraces/releases/tag/v0.3.0) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 4f5c310cc..16cc29244 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -132,7 +132,7 @@ Here's a response example: #### Querying dependencies -> To enable dependencies visualization, you **must** set `-servicegraph.enableTask` to `true` on VictoriaTraces single-node or +> This feature is **experimental**. To enable dependencies visualization, you **must** set `-servicegraph.enableTask` to `true` on VictoriaTraces single-node or > vtstorage to run the background task, which generates service graph data periodically. See also: `-servicegraph.*` flags. The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint. From 2208492bb4de96892e5983483d53952c91812aa7 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Tue, 7 Oct 2025 16:32:38 +0800 Subject: [PATCH 23/26] feature: [dependency] apply review suggestions --- app/victoria-traces/servicegraph/servicegraph.go | 16 ++++++---------- app/vtgen/main.go | 13 +++++++++++-- app/vtselect/traces/query/query.go | 15 +++++++-------- docs/victoriatraces/querying/README.md | 5 +++-- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/app/victoria-traces/servicegraph/servicegraph.go b/app/victoria-traces/servicegraph/servicegraph.go index de2c0eadb..7da6df1f6 100644 --- a/app/victoria-traces/servicegraph/servicegraph.go +++ b/app/victoria-traces/servicegraph/servicegraph.go @@ -50,9 +50,11 @@ func newServiceGraphTask() *serviceGraphTask { } func (sgt *serviceGraphTask) Start() { - logger.Infof("starting background task for service graph, interval: %v, lookbehind: %v", *serviceGraphTaskInterval, *serviceGraphTaskLookbehind) - ticker := time.NewTicker(*serviceGraphTaskInterval) + logger.Infof("starting servicegraph background task, interval: %v, lookbehind: %v", *serviceGraphTaskInterval, *serviceGraphTaskLookbehind) go func() { + ticker := time.NewTicker(*serviceGraphTaskInterval) + defer ticker.Stop() + for { select { case <-sgt.stopCh: @@ -82,14 +84,8 @@ func GenerateServiceGraphTimeRange(ctx context.Context) { // query and persist operations are executed sequentially, which helps not to consume excessive resources. for _, tenantID := range tenantIDs { - // Build a fake HTTP *Request. It helps align the way of handling tenant-related input - // in both vtselect and vtinsert, as they assume tenant info will exist in HTTP headers. - r, _ := http.NewRequestWithContext(ctx, "", "", nil) - r.Header.Set("AccountID", strconv.FormatUint(uint64(tenantID.AccountID), 10)) - r.Header.Set("ProjectID", strconv.FormatUint(uint64(tenantID.ProjectID), 10)) - // query service graph relations - rows, err := vtselect.GetServiceGraphTimeRange(ctx, r, startTime, endTime, *serviceGraphTaskLimit) + rows, err := vtselect.GetServiceGraphTimeRange(ctx, tenantID, startTime, endTime, *serviceGraphTaskLimit) if err != nil { logger.Errorf("cannot get service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) return @@ -101,7 +97,7 @@ func GenerateServiceGraphTimeRange(ctx context.Context) { // persist service graph relations err = vtinsert.PersistServiceGraph(ctx, r, rows, endTime) if err != nil { - logger.Errorf("cannot presist service graph for time %d: %s", endTime.Unix(), err) + logger.Errorf("cannot presist service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) } } } diff --git a/app/vtgen/main.go b/app/vtgen/main.go index b0ddcf85e..c0dbea0dd 100644 --- a/app/vtgen/main.go +++ b/app/vtgen/main.go @@ -116,7 +116,7 @@ func main() { } } - // replace SpanID and parentSpanID + // replace SpanID if sid, ok := spanIDMap[sp.SpanID]; ok { sp.SpanID = sid } else { @@ -126,7 +126,7 @@ func main() { spanIDMap[oldSpanID] = spanID } - // replace SpanID and parentSpanID + // replace parentSpanID if sid, ok := spanIDMap[sp.ParentSpanID]; ok { sp.ParentSpanID = sid } else { @@ -219,13 +219,22 @@ func loadTestData() [][]byte { return bodyList } +var traceIDMutex sync.Mutex + func generateTraceID() string { + traceIDMutex.Lock() + defer traceIDMutex.Unlock() + h := md5.New() h.Write([]byte(strconv.FormatInt(time.Now().UnixNano(), 10))) return hex.EncodeToString(h.Sum(nil)) } +var spanIDMutex sync.Mutex + func generateSpanID() string { + spanIDMutex.Lock() + defer spanIDMutex.Unlock() h := md5.New() h.Write([]byte(strconv.FormatInt(time.Now().UnixNano(), 10))) return hex.EncodeToString(h.Sum(nil))[:16] diff --git a/app/vtselect/traces/query/query.go b/app/vtselect/traces/query/query.go index 4b6d6000e..f766e381a 100644 --- a/app/vtselect/traces/query/query.go +++ b/app/vtselect/traces/query/query.go @@ -601,7 +601,6 @@ func GetServiceGraphList(ctx context.Context, cp *CommonParams, param *ServiceGr var rowsLock sync.Mutex var rows []*Row - //var missingTimeColumn atomic.Bool writeBlock := func(_ uint, db *logstorage.DataBlock) { columns := db.Columns if len(columns) == 0 { @@ -644,12 +643,13 @@ func GetServiceGraphList(ctx context.Context, cp *CommonParams, param *ServiceGr return rows, nil } -// GetServiceGraphTimeRange calculate the service graph relation within the time range in (parent, child, callCount) format. -func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, endTime time.Time, limit uint64) ([][]logstorage.Field, error) { - cp, err := GetCommonParams(r) - if err != nil { - return nil, err +// GetServiceGraphTimeRange is an internal function used by service graph background task. +// It calculates the service graph relation within the time range in (parent, child, callCount) format for specific tenant. +func GetServiceGraphTimeRange(ctx context.Context, tenantID logstorage.TenantID, startTime, endTime time.Time, limit uint64) ([][]logstorage.Field, error) { + cp := &CommonParams{ + TenantIDs: []logstorage.TenantID{tenantID}, } + // (NOT parent_span_id:"") AND (kind:~"2|5") | fields parent_span_id, resource_attr:service.name | rename parent_span_id as span_id, resource_attr:service.name as child qStrChildSpans := fmt.Sprintf( `(NOT %s:"") AND (%s:~"%d|%d") | fields %s, %s | rename %s as %s, %s as %s`, @@ -702,7 +702,6 @@ func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, e var rowsLock sync.Mutex var rows [][]logstorage.Field - //var missingTimeColumn atomic.Bool writeBlock := func(_ uint, db *logstorage.DataBlock) { columns := db.Columns if len(columns) == 0 { @@ -721,7 +720,7 @@ func GetServiceGraphTimeRange(ctx context.Context, r *http.Request, startTime, e } for i := 0; i < valuesCount; i++ { fields := make([]logstorage.Field, 0, len(columns)) - for j := range columns { + for j := range clonedColumnNames { fields = append( fields, logstorage.Field{ diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 16cc29244..2a913a4f4 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -135,13 +135,14 @@ Here's a response example: > This feature is **experimental**. To enable dependencies visualization, you **must** set `-servicegraph.enableTask` to `true` on VictoriaTraces single-node or > vtstorage to run the background task, which generates service graph data periodically. See also: `-servicegraph.*` flags. -The dependency graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint. +The dependencies graph is available at the `/select/jaeger/api/dependencies` HTTP endpoint, which implemented the +[Jaeger service dependencies graph API](https://www.jaegertracing.io/docs/2.10/architecture/apis/#service-dependencies-graph). This endpoint provides the following params: - `endTs`: the end timestamp in unix milliseconds. Current timestamp will be used if empty. - `lookback`: the lookbehind window duration in milliseconds. Default to `1h` if empty. -Here are examples of the dependency API: +Here are examples of the dependencies API: 1. Show dependencies within a time range: ```sh From 6d82e1f7752ca13895b537bb1ba5dbdeedeb0f91 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Tue, 7 Oct 2025 16:37:05 +0800 Subject: [PATCH 24/26] feature: [dependency] apply review suggestions --- app/victoria-traces/servicegraph/servicegraph.go | 4 +--- app/vtinsert/opentelemetry/opentelemetry.go | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/app/victoria-traces/servicegraph/servicegraph.go b/app/victoria-traces/servicegraph/servicegraph.go index 7da6df1f6..60f57cad1 100644 --- a/app/victoria-traces/servicegraph/servicegraph.go +++ b/app/victoria-traces/servicegraph/servicegraph.go @@ -3,8 +3,6 @@ package servicegraph import ( "context" "flag" - "net/http" - "strconv" "time" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" @@ -95,7 +93,7 @@ func GenerateServiceGraphTimeRange(ctx context.Context) { } // persist service graph relations - err = vtinsert.PersistServiceGraph(ctx, r, rows, endTime) + err = vtinsert.PersistServiceGraph(ctx, tenantID, rows, endTime) if err != nil { logger.Errorf("cannot presist service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) } diff --git a/app/vtinsert/opentelemetry/opentelemetry.go b/app/vtinsert/opentelemetry/opentelemetry.go index c0f9b1c3e..e011df515 100644 --- a/app/vtinsert/opentelemetry/opentelemetry.go +++ b/app/vtinsert/opentelemetry/opentelemetry.go @@ -286,10 +286,10 @@ func appendKeyValuesWithPrefixSuffix(fields []logstorage.Field, kvs []*otelpb.Ke return fields } -func PersistServiceGraph(ctx context.Context, r *http.Request, fields [][]logstorage.Field, timestamp time.Time) error { - cp, err := insertutil.GetCommonParams(r) - if err != nil { - return err +func PersistServiceGraph(ctx context.Context, tenantID logstorage.TenantID, fields [][]logstorage.Field, timestamp time.Time) error { + cp := insertutil.CommonParams{ + TenantID: tenantID, + TimeFields: []string{"_time"}, } lmp := cp.NewLogMessageProcessor("internalinsert_servicegraph", false) From 1b69ebab802d40f49da8fa7ec56b352ef9a33a0b Mon Sep 17 00:00:00 2001 From: Zhu Jiekun Date: Tue, 7 Oct 2025 16:39:12 +0800 Subject: [PATCH 25/26] Update docs/victoriatraces/querying/README.md Co-authored-by: Max Kotliar --- docs/victoriatraces/querying/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/victoriatraces/querying/README.md b/docs/victoriatraces/querying/README.md index 2a913a4f4..8fea75707 100644 --- a/docs/victoriatraces/querying/README.md +++ b/docs/victoriatraces/querying/README.md @@ -49,7 +49,7 @@ VictoriaTraces provides the following Jaeger HTTP endpoints: - `/select/jaeger/api/services` for querying all the services - `/select/jaeger/api/services/{service_name}/operations` for querying all the span names of a service. - `/select/jaeger/api/traces/{trace_id}` for querying a trace. -- `/select/jaeger/api/dependencies` for querying dependencies graph for services. +- `/select/jaeger/api/dependencies` for querying the service dependency graph. - `/select/jaeger/api/traces` for querying traces. The `/select/jaeger/api/traces` HTTP endpoint provides the following params: From bac80215cc6379acd0f9b5f23a0d672f6a2bc731 Mon Sep 17 00:00:00 2001 From: Zhu Jiekun Date: Tue, 7 Oct 2025 16:42:49 +0800 Subject: [PATCH 26/26] Update docs/victoriatraces/changelog/CHANGELOG.md Co-authored-by: Max Kotliar --- docs/victoriatraces/changelog/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/victoriatraces/changelog/CHANGELOG.md b/docs/victoriatraces/changelog/CHANGELOG.md index c28e6be7e..a33a4ab2a 100644 --- a/docs/victoriatraces/changelog/CHANGELOG.md +++ b/docs/victoriatraces/changelog/CHANGELOG.md @@ -19,7 +19,7 @@ The following `tip` changes can be tested by building VictoriaTraces components * BUGFIX: all components: restore sorting order of summary and quantile metrics exposed by VictoriaTraces components on `/metrics` page. See [metrics#105](https://github.com/VictoriaMetrics/metrics/pull/105) for details. -* FEATURE: [Single-node VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) and vtselect in [VictoriaTraces cluster](https://docs.victoriametrics.com/victoriatraces/cluster/): (experimental) support Jaeger [service dependencies graph API](https://www.jaegertracing.io/docs/2.10/architecture/apis/#service-dependencies-graph). See [this pull request](https://github.com/VictoriaMetrics/VictoriaTraces/pull/52) for details. +* FEATURE: [Single-node VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) and [VictoriaTraces cluster](https://docs.victoriametrics.com/victoriatraces/cluster/): (experimental) support Jaeger [service dependencies graph API](https://www.jaegertracing.io/docs/2.10/architecture/apis/#service-dependencies-graph). It requires `--servicegraph.enableTask=true` flag to be set on Single-node VictoriaTraces or each vtstorage instance. See [#52](https://github.com/VictoriaMetrics/VictoriaTraces/pull/52) for details. ## [v0.3.0](https://github.com/VictoriaMetrics/VictoriaTraces/releases/tag/v0.3.0)