From 9b29558585b71df6ed92cff1bad2697bfd047a0a Mon Sep 17 00:00:00 2001 From: Namkyu Park <53862866+namkyu1999@users.noreply.github.com> Date: Wed, 15 Jan 2025 20:03:43 +0900 Subject: [PATCH 1/4] feat: export k6 results output to the OTEL collector (#726) * Export k6 results to the otel collector Signed-off-by: namkyu1999 * add envs for multiple projects Signed-off-by: namkyu1999 --------- Signed-off-by: namkyu1999 Co-authored-by: Shubham Chaudhary Co-authored-by: Saranya Jena --- chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go | 38 ++++++++++++++++--- .../k6-loadgen/environment/environment.go | 2 +- pkg/load/k6-loadgen/types/types.go | 1 + 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go index 1de1d862b..79ce56b30 100644 --- a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go +++ b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go @@ -3,6 +3,7 @@ package lib import ( "context" "fmt" + "os" "strconv" "github.com/litmuschaos/litmus-go/pkg/cerrors" @@ -103,6 +104,35 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex const volumeName = "script-volume" const mountPath = "/mnt" + + var envs []corev1.EnvVar + args := []string{ + mountPath + "/" + experimentsDetails.ScriptSecretKey, + "-q", + "--duration", + strconv.Itoa(experimentsDetails.ChaosDuration) + "s", + "--tag", + "trace_id=" + span.SpanContext().TraceID().String(), + } + + if otelExporterEndpoint := os.Getenv(telemetry.OTELExporterOTLPEndpoint); otelExporterEndpoint != "" { + envs = []corev1.EnvVar{ + { + Name: "K6_OTEL_METRIC_PREFIX", + Value: experimentsDetails.OTELMetricPrefix, + }, + { + Name: "K6_OTEL_GRPC_EXPORTER_INSECURE", + Value: "true", + }, + { + Name: "K6_OTEL_GRPC_EXPORTER_ENDPOINT", + Value: otelExporterEndpoint, + }, + } + args = append(args, "--out", "experimental-opentelemetry") + } + helperPod := &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ GenerateName: experimentsDetails.ExperimentName + "-helper-", @@ -122,12 +152,8 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex "k6", "run", }, - Args: []string{ - mountPath + "/" + experimentsDetails.ScriptSecretKey, - "-q", - "--duration", - strconv.Itoa(experimentsDetails.ChaosDuration) + "s", - }, + Args: args, + Env: envs, Resources: chaosDetails.Resources, VolumeMounts: []corev1.VolumeMount{ { diff --git a/pkg/load/k6-loadgen/environment/environment.go b/pkg/load/k6-loadgen/environment/environment.go index 48ac8df28..aa7507f16 100644 --- a/pkg/load/k6-loadgen/environment/environment.go +++ b/pkg/load/k6-loadgen/environment/environment.go @@ -25,5 +25,5 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.LIBImage = types.Getenv("LIB_IMAGE", "ghcr.io/grafana/k6-operator:latest-runner") experimentDetails.ScriptSecretName = types.Getenv("SCRIPT_SECRET_NAME", "k6-script") experimentDetails.ScriptSecretKey = types.Getenv("SCRIPT_SECRET_KEY", "script.js") - + experimentDetails.OTELMetricPrefix = types.Getenv("OTEL_METRIC_PREFIX", "k6_") } diff --git a/pkg/load/k6-loadgen/types/types.go b/pkg/load/k6-loadgen/types/types.go index 915f8a8d5..9b6d6877f 100644 --- a/pkg/load/k6-loadgen/types/types.go +++ b/pkg/load/k6-loadgen/types/types.go @@ -18,4 +18,5 @@ type ExperimentDetails struct { LIBImage string ScriptSecretName string ScriptSecretKey string + OTELMetricPrefix string } From 8246ff891bb433897e13deed7fa3ef10601cf7da Mon Sep 17 00:00:00 2001 From: Suhyen Im Date: Wed, 15 Jan 2025 20:04:19 +0900 Subject: [PATCH 2/4] feat: propagate trace context to helper pods (#722) Signed-off-by: Suhyen Im Co-authored-by: Shubham Chaudhary Co-authored-by: Saranya Jena --- bin/helper/helper.go | 14 +++++++------- .../litmus/container-kill/helper/container-kill.go | 6 +++++- chaoslib/litmus/disk-fill/helper/disk-fill.go | 6 +++++- chaoslib/litmus/http-chaos/helper/http-helper.go | 7 ++++++- chaoslib/litmus/network-chaos/helper/netem.go | 7 ++++++- chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go | 7 ++++++- .../litmus/stress-chaos/helper/stress-helper.go | 7 ++++++- 7 files changed, 41 insertions(+), 13 deletions(-) diff --git a/bin/helper/helper.go b/bin/helper/helper.go index 667c79293..c2774a131 100644 --- a/bin/helper/helper.go +++ b/bin/helper/helper.go @@ -54,7 +54,7 @@ func main() { clients := cli.ClientSets{} - _, span := otel.Tracer(telemetry.TracerName).Start(ctx, "ExecuteExperimentHelper") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "ExecuteExperimentHelper") defer span.End() // parse the helper name @@ -71,17 +71,17 @@ func main() { // invoke the corresponding helper based on the the (-name) flag switch *helperName { case "container-kill": - containerKill.Helper(clients) + containerKill.Helper(ctx, clients) case "disk-fill": - diskFill.Helper(clients) + diskFill.Helper(ctx, clients) case "dns-chaos": - dnsChaos.Helper(clients) + dnsChaos.Helper(ctx, clients) case "stress-chaos": - stressChaos.Helper(clients) + stressChaos.Helper(ctx, clients) case "network-chaos": - networkChaos.Helper(clients) + networkChaos.Helper(ctx, clients) case "http-chaos": - httpChaos.Helper(clients) + httpChaos.Helper(ctx, clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *helperName) diff --git a/chaoslib/litmus/container-kill/helper/container-kill.go b/chaoslib/litmus/container-kill/helper/container-kill.go index 6ea335467..81e6b1a67 100644 --- a/chaoslib/litmus/container-kill/helper/container-kill.go +++ b/chaoslib/litmus/container-kill/helper/container-kill.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "fmt" + "github.com/litmuschaos/litmus-go/pkg/telemetry" + "go.opentelemetry.io/otel" "os/exec" "strconv" "time" @@ -27,7 +29,9 @@ import ( var err error // Helper injects the container-kill chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulateContainerKillFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/disk-fill/helper/disk-fill.go b/chaoslib/litmus/disk-fill/helper/disk-fill.go index 0ebaf03be..c851ba26f 100644 --- a/chaoslib/litmus/disk-fill/helper/disk-fill.go +++ b/chaoslib/litmus/disk-fill/helper/disk-fill.go @@ -4,7 +4,9 @@ import ( "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -29,7 +31,9 @@ import ( var inject, abort chan os.Signal // Helper injects the disk-fill chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulateDiskFillFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/http-chaos/helper/http-helper.go b/chaoslib/litmus/http-chaos/helper/http-helper.go index e5a0bd29c..b544df448 100644 --- a/chaoslib/litmus/http-chaos/helper/http-helper.go +++ b/chaoslib/litmus/http-chaos/helper/http-helper.go @@ -1,9 +1,12 @@ package helper import ( + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/signal" "strconv" @@ -27,7 +30,9 @@ var ( ) // Helper injects the http chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodHTTPFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/network-chaos/helper/netem.go b/chaoslib/litmus/network-chaos/helper/netem.go index 415039c44..b5d200c24 100644 --- a/chaoslib/litmus/network-chaos/helper/netem.go +++ b/chaoslib/litmus/network-chaos/helper/netem.go @@ -1,10 +1,13 @@ package helper import ( + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -34,7 +37,9 @@ var ( ) // Helper injects the network chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodNetworkFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go index 794da032e..04b895f9b 100644 --- a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go +++ b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go @@ -2,9 +2,12 @@ package helper import ( "bytes" + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -34,7 +37,9 @@ const ( ) // Helper injects the dns chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodDNSFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index e879b2d02..9d4e3a66d 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -3,9 +3,12 @@ package helper import ( "bufio" "bytes" + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "io" "os" "os/exec" @@ -51,7 +54,9 @@ const ( ) // Helper injects the stress chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodStressFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} From 34a62d87f32ae898dce6666ca4106ad7329cf86c Mon Sep 17 00:00:00 2001 From: kbfu Date: Fri, 17 Jan 2025 12:59:30 +0700 Subject: [PATCH 3/4] fix the cgroup 2 problem (#677) Co-authored-by: Shubham Chaudhary --- .../stress-chaos/helper/stress-helper.go | 52 +++++++++++++------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index 9d4e3a66d..c2f62bcc2 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -110,7 +110,9 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c return stacktrace.Propagate(err, "could not parse targets") } - var targets []targetDetails + var ( + targets []targetDetails + ) for _, t := range targetList.Target { td := targetDetails{ @@ -131,7 +133,7 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c return stacktrace.Propagate(err, "could not get container pid") } - td.CGroupManager, err = getCGroupManager(td) + td.CGroupManager, err, td.GroupPath = getCGroupManager(td) if err != nil { return stacktrace.Propagate(err, "could not get cgroup manager") } @@ -499,43 +501,61 @@ func abortWatcher(targets []targetDetails, resultName, chaosNS string) { } // getCGroupManager will return the cgroup for the given pid of the process -func getCGroupManager(t targetDetails) (interface{}, error) { +func getCGroupManager(t targetDetails) (interface{}, error, string) { if cgroups.Mode() == cgroups.Unified { - groupPath, err := cgroupsv2.PidGroupPath(t.Pid) + groupPath := "" + output, err := exec.Command("bash", "-c", fmt.Sprintf("nsenter -t 1 -C -m -- cat /proc/%v/cgroup", t.Pids[index])).CombinedOutput() if err != nil { - return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to get pid group path: %s", err.Error())} + return nil, errors.Errorf("Error in getting groupPath,%s", string(output)), "" + } + parts := strings.SplitN(string(output), ":", 3) + if len(parts) < 3 { + return "", fmt.Errorf("invalid cgroup entry: %s", string(output)), "" + } + if parts[0] == "0" && parts[1] == "" { + groupPath = parts[2] } - cgroup2, err := cgroupsv2.LoadManager("/sys/fs/cgroup", groupPath) + log.Infof("group path: %s", groupPath) + + cgroup2, err := cgroupsv2.LoadManager("/sys/fs/cgroup", string(groupPath)) if err != nil { - return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to load the cgroup: %s", err.Error())} + return nil, errors.Errorf("Error loading cgroup v2 manager, %v", err), "" } - return cgroup2, nil + return cgroup2, nil, groupPath } path := pidPath(t) cgroup, err := findValidCgroup(path, t) if err != nil { - return nil, stacktrace.Propagate(err, "could not find valid cgroup") + return nil, stacktrace.Propagate(err, "could not find valid cgroup"), "" } cgroup1, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(cgroup)) if err != nil { - return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to load the cgroup: %s", err.Error())} + return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to load the cgroup: %s", err.Error())}, "" } - return cgroup1, nil + return cgroup1, nil, "" } // addProcessToCgroup will add the process to cgroup // By default it will add to v1 cgroup -func addProcessToCgroup(pid int, control interface{}) error { +func addProcessToCgroup(pid int, control interface{}, groupPath string) error { if cgroups.Mode() == cgroups.Unified { - var cgroup1 = control.(*cgroupsv2.Manager) - return cgroup1.AddProc(uint64(pid)) + args := []string{"-t", "1", "-C", "--", "sudo", "sh", "-c", fmt.Sprintf("echo %d >> /sys/fs/cgroup%s/cgroup.procs", pid, strings.ReplaceAll(groupPath, "\n", ""))} + output, err := exec.Command("nsenter", args...).CombinedOutput() + if err != nil { + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeChaosInject, + Reason: fmt.Sprintf("failed to add process to cgroup %s: %v", string(output), err), + } + } + return nil } var cgroup1 = control.(cgroups.Cgroup) return cgroup1.Add(cgroups.Process{Pid: pid}) } + func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, error) { stressCommand := fmt.Sprintf("pause nsutil -t %v -p -- %v", strconv.Itoa(t.Pid), stressors) // for io stress,we need to enter into mount ns of the target container @@ -543,6 +563,7 @@ func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, erro if stressType == "pod-io-stress" { stressCommand = fmt.Sprintf("pause nsutil -t %v -p -m -- %v", strconv.Itoa(t.Pid), stressors) } + log.Infof("[Info]: starting process: %v", stressCommand) // launch the stress-ng process on the target container in paused mode @@ -556,7 +577,7 @@ func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, erro } // add the stress process to the cgroup of target container - if err = addProcessToCgroup(cmd.Process.Pid, t.CGroupManager); err != nil { + if err = addProcessToCgroup(cmd.Process.Pid, t.CGroupManager, t.GroupPath); err != nil { if killErr := cmd.Process.Kill(); killErr != nil { return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to add the stress process to cgroup %s and kill stress process: %s", err.Error(), killErr.Error())} } @@ -584,4 +605,5 @@ type targetDetails struct { CGroupManager interface{} Cmd *exec.Cmd Source string + GroupPath string } From caae228e354a9b744ad24b9c7a82b0279c1cced3 Mon Sep 17 00:00:00 2001 From: Shubham Chaudhary Date: Fri, 17 Jan 2025 12:08:34 +0530 Subject: [PATCH 4/4] (chore): fix the go fmt of the files (#734) Signed-off-by: Shubham Chaudhary --- chaoslib/litmus/stress-chaos/helper/stress-helper.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index c2f62bcc2..dd310f07a 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -111,7 +111,7 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c } var ( - targets []targetDetails + targets []targetDetails ) for _, t := range targetList.Target { @@ -504,7 +504,7 @@ func abortWatcher(targets []targetDetails, resultName, chaosNS string) { func getCGroupManager(t targetDetails) (interface{}, error, string) { if cgroups.Mode() == cgroups.Unified { groupPath := "" - output, err := exec.Command("bash", "-c", fmt.Sprintf("nsenter -t 1 -C -m -- cat /proc/%v/cgroup", t.Pids[index])).CombinedOutput() + output, err := exec.Command("bash", "-c", fmt.Sprintf("nsenter -t 1 -C -m -- cat /proc/%v/cgroup", t.Pid)).CombinedOutput() if err != nil { return nil, errors.Errorf("Error in getting groupPath,%s", string(output)), "" } @@ -518,7 +518,7 @@ func getCGroupManager(t targetDetails) (interface{}, error, string) { log.Infof("group path: %s", groupPath) - cgroup2, err := cgroupsv2.LoadManager("/sys/fs/cgroup", string(groupPath)) + cgroup2, err := cgroupsv2.LoadManager("/sys/fs/cgroup", groupPath) if err != nil { return nil, errors.Errorf("Error loading cgroup v2 manager, %v", err), "" } @@ -541,7 +541,7 @@ func getCGroupManager(t targetDetails) (interface{}, error, string) { // By default it will add to v1 cgroup func addProcessToCgroup(pid int, control interface{}, groupPath string) error { if cgroups.Mode() == cgroups.Unified { - args := []string{"-t", "1", "-C", "--", "sudo", "sh", "-c", fmt.Sprintf("echo %d >> /sys/fs/cgroup%s/cgroup.procs", pid, strings.ReplaceAll(groupPath, "\n", ""))} + args := []string{"-t", "1", "-C", "--", "sudo", "sh", "-c", fmt.Sprintf("echo %d >> /sys/fs/cgroup%s/cgroup.procs", pid, strings.ReplaceAll(groupPath, "\n", ""))} output, err := exec.Command("nsenter", args...).CombinedOutput() if err != nil { return cerrors.Error{ @@ -555,7 +555,6 @@ func addProcessToCgroup(pid int, control interface{}, groupPath string) error { return cgroup1.Add(cgroups.Process{Pid: pid}) } - func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, error) { stressCommand := fmt.Sprintf("pause nsutil -t %v -p -- %v", strconv.Itoa(t.Pid), stressors) // for io stress,we need to enter into mount ns of the target container @@ -605,5 +604,5 @@ type targetDetails struct { CGroupManager interface{} Cmd *exec.Cmd Source string - GroupPath string + GroupPath string }