-
Notifications
You must be signed in to change notification settings - Fork 11
feature: implemented jaeger service dependencies API #52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
8b149b0
a34c782
2692214
f7c2372
d305081
b9f10d9
388e680
6005d16
4e148b0
7b63c52
bcc928e
f81aa35
60be6a3
352dc5a
c84411c
827912a
8d20559
793343d
a4d1336
2eaf892
bf8b386
757174e
d59963b
29cce0f
d1f55b0
d3f69e5
2208492
6d82e1f
1b69eba
bac8021
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
package servicegraph | ||
|
||
import ( | ||
"context" | ||
"flag" | ||
"time" | ||
|
||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" | ||
|
||
vtinsert "github.com/VictoriaMetrics/VictoriaTraces/app/vtinsert/opentelemetry" | ||
vtselect "github.com/VictoriaMetrics/VictoriaTraces/app/vtselect/traces/query" | ||
"github.com/VictoriaMetrics/VictoriaTraces/app/vtstorage" | ||
) | ||
|
||
var ( | ||
enableServiceGraphTask = flag.Bool("servicegraph.enableTask", false, "Whether to enable background task for generating service graph. It should only be enabled on VictoriaTraces single-node or vtstorage.") | ||
serviceGraphTaskInterval = flag.Duration("servicegraph.taskInterval", time.Minute, "The background task interval for generating service graph data. It requires setting -servicegraph.enableTask=true.") | ||
serviceGraphTaskTimeout = flag.Duration("servicegraph.taskTimeout", 30*time.Second, "The background task timeout duration for generating service graph data. It requires setting -servicegraph.enableTask=true.") | ||
serviceGraphTaskLookbehind = flag.Duration("servicegraph.taskLookbehind", time.Minute, "The lookbehind window for each time service graph background task run. It requires setting -servicegraph.enableTask=true.") | ||
serviceGraphTaskLimit = flag.Uint64("servicegraph.taskLimit", 1000, "How many service graph relations each task could fetch for each tenant. It requires setting -servicegraph.enableTask=true.") | ||
) | ||
|
||
var ( | ||
sgt *serviceGraphTask | ||
) | ||
|
||
func Init() { | ||
if *enableServiceGraphTask { | ||
sgt = newServiceGraphTask() | ||
sgt.Start() | ||
} | ||
} | ||
|
||
func Stop() { | ||
if *enableServiceGraphTask { | ||
sgt.Stop() | ||
} | ||
} | ||
|
||
type serviceGraphTask struct { | ||
stopCh chan struct{} | ||
} | ||
|
||
func newServiceGraphTask() *serviceGraphTask { | ||
return &serviceGraphTask{ | ||
stopCh: make(chan struct{}), | ||
} | ||
} | ||
|
||
func (sgt *serviceGraphTask) Start() { | ||
logger.Infof("starting servicegraph background task, interval: %v, lookbehind: %v", *serviceGraphTaskInterval, *serviceGraphTaskLookbehind) | ||
go func() { | ||
ticker := time.NewTicker(*serviceGraphTaskInterval) | ||
defer ticker.Stop() | ||
|
||
for { | ||
select { | ||
case <-sgt.stopCh: | ||
return | ||
case <-ticker.C: | ||
ctx, cancelFunc := context.WithTimeout(context.Background(), *serviceGraphTaskTimeout) | ||
GenerateServiceGraphTimeRange(ctx) | ||
cancelFunc() | ||
} | ||
} | ||
}() | ||
} | ||
|
||
func (sgt *serviceGraphTask) Stop() { | ||
close(sgt.stopCh) | ||
} | ||
|
||
func GenerateServiceGraphTimeRange(ctx context.Context) { | ||
endTime := time.Now().Truncate(*serviceGraphTaskInterval) | ||
startTime := endTime.Add(-*serviceGraphTaskLookbehind) | ||
|
||
tenantIDs, err := vtstorage.GetTenantIDsByTimeRange(ctx, startTime.UnixNano(), endTime.UnixNano()) | ||
if err != nil { | ||
logger.Errorf("cannot get tenant ids: %s", err) | ||
return | ||
} | ||
|
||
// query and persist operations are executed sequentially, which helps not to consume excessive resources. | ||
for _, tenantID := range tenantIDs { | ||
// query service graph relations | ||
rows, err := vtselect.GetServiceGraphTimeRange(ctx, tenantID, startTime, endTime, *serviceGraphTaskLimit) | ||
if err != nil { | ||
logger.Errorf("cannot get service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) | ||
return | ||
} | ||
if len(rows) == 0 { | ||
return | ||
} | ||
|
||
// persist service graph relations | ||
err = vtinsert.PersistServiceGraph(ctx, tenantID, rows, endTime) | ||
if err != nil { | ||
logger.Errorf("cannot presist service graph for time range [%d, %d]: %s", startTime.Unix(), endTime.Unix(), err) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,7 @@ func main() { | |
// The traceIDMap recorded old traceID->new traceID. | ||
// Spans with same old traceID should be replaced with same new traceID. | ||
traceIDMap := make(map[string]string) | ||
spanIDMap := make(map[string]string) | ||
|
||
// The timeOffset is the time offset of span timestamp and current timestamp. | ||
// All spans' timestamp should be increased by this offset. | ||
|
@@ -115,6 +116,26 @@ func main() { | |
} | ||
} | ||
|
||
// replace SpanID | ||
if sid, ok := spanIDMap[sp.SpanID]; ok { | ||
sp.SpanID = sid | ||
} else { | ||
spanID := generateSpanID() | ||
oldSpanID := sp.SpanID | ||
sp.SpanID = spanID | ||
spanIDMap[oldSpanID] = spanID | ||
} | ||
|
||
// replace parentSpanID | ||
if sid, ok := spanIDMap[sp.ParentSpanID]; ok { | ||
sp.ParentSpanID = sid | ||
} else { | ||
parentSpanID := generateSpanID() | ||
oldParentSpanID := sp.ParentSpanID | ||
sp.ParentSpanID = parentSpanID | ||
spanIDMap[oldParentSpanID] = parentSpanID | ||
} | ||
|
||
// adjust the timestamp of the span. | ||
sp.StartTimeUnixNano = sp.StartTimeUnixNano + timeOffset | ||
sp.EndTimeUnixNano = sp.EndTimeUnixNano + timeOffset + uint64(rand.Int63n(100000000)) | ||
|
@@ -198,12 +219,27 @@ func loadTestData() [][]byte { | |
return bodyList | ||
} | ||
|
||
var traceIDMutex sync.Mutex | ||
|
||
func generateTraceID() string { | ||
traceIDMutex.Lock() | ||
defer traceIDMutex.Unlock() | ||
|
||
h := md5.New() | ||
h.Write([]byte(strconv.FormatInt(time.Now().UnixNano(), 10))) | ||
return hex.EncodeToString(h.Sum(nil)) | ||
} | ||
|
||
var spanIDMutex sync.Mutex | ||
|
||
func generateSpanID() string { | ||
spanIDMutex.Lock() | ||
defer spanIDMutex.Unlock() | ||
h := md5.New() | ||
h.Write([]byte(strconv.FormatInt(time.Now().UnixNano(), 10))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could the function be called concurrently? What would happen if it produced the same span ID? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. This is not strong enough. Identical traceID resulted in incorrect relations. |
||
return hex.EncodeToString(h.Sum(nil))[:16] | ||
} | ||
|
||
// readWrite Does the following: | ||
// 1. read request body binary files like `1.bin`, `2.bin` and puts them into `BodyList`. | ||
// 2. encode and compress the `BodyList` into `[]byte`. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don’t think this comment adds much value; consider removing it or adding more context.