Skip to content

Commit

Permalink
ebpf: reducing ebpf call overhead by using sampling instead of tracin…
Browse files Browse the repository at this point in the history
…g every calls (#928)

* ebpf: reducing ebpf call overhead by using sampling instead of tracing every calls

Signed-off-by: Huamin Chen <[email protected]>

* review feedback: setting sample_rate to 1

Signed-off-by: Huamin Chen <[email protected]>

---------

Signed-off-by: Huamin Chen <[email protected]>
  • Loading branch information
rootfs authored Sep 13, 2023
1 parent d8f89ea commit 5f33240
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 2 deletions.
Binary file modified bpfassets/libbpf/bpf.o/amd64_kepler.bpf.o
Binary file not shown.
13 changes: 12 additions & 1 deletion bpfassets/libbpf/src/kepler.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ BPF_ARRAY(cache_miss, u64);
// cpu freq counters
BPF_ARRAY(cpu_freq_array, u32);

// setting sample rate or counter to 0 will make compiler to remove the code entirely.
int sample_rate = 1;
int counter = 1;

static inline u64 get_on_cpu_time(u32 cur_pid, u32 prev_pid, u64 cur_ts)
{
u64 cpu_time = 0;
Expand Down Expand Up @@ -207,6 +211,13 @@ SEC("tracepoint/sched/sched_switch")
int kepler_trace(struct sched_switch_args *ctx)
{
u32 next_pid = ctx->next_pid; // the new pid that is to be scheduled

if (counter > 0)
{
counter--;
return 0;
}
counter = sample_rate;
u32 cur_pid = bpf_get_current_pid_tgid();
u64 cgroup_id = bpf_get_current_cgroup_id(); // the cgroup id is the cgroup id of the running process (this is not next_pid or prev_pid)
u64 cur_ts = bpf_ktime_get_ns();
Expand Down Expand Up @@ -260,4 +271,4 @@ int kepler_irq_trace(struct trace_event_raw_softirq *ctx)
return 0;
}

char _license[] SEC("license") = "GPL";
char _license[] SEC("license") = "GPL";
18 changes: 18 additions & 0 deletions bpfassets/perf_event/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ BPF_ARRAY(cache_miss, u64, NUM_CPUS);
// cpu freq counters
BPF_ARRAY(cpu_freq_array, u32, NUM_CPUS);

#ifndef SAMPLE_RATE
#define SAMPLE_RATE 0
#endif
BPF_HASH(sample_rate, u32, u32);

static inline u64 get_on_cpu_time(pid_t cur_pid, u32 prev_pid, u32 cpu_id, u64 cur_ts)
{
u64 cpu_time = 0;
Expand Down Expand Up @@ -192,6 +197,19 @@ static inline u64 get_on_cpu_avg_freq(u32 *cpu_id, u64 on_cpu_cycles_delta, u64
// int kprobe__finish_task_switch(switch_args *ctx)
int kprobe__finish_task_switch(struct pt_regs *ctx, struct task_struct *prev)
{
u32 initial = SAMPLE_RATE, *sample_counter_value, sample_counter_key = 1234;
// only do sampling if sample rate is set
if (initial != 0) {
sample_counter_value = sample_rate.lookup_or_try_init(&sample_counter_key, &initial);
if (sample_counter_value > 0) {
if (*sample_counter_value > 0) {
(*sample_counter_value)--;
return 0;
}
}
sample_rate.update(&sample_counter_key, &initial);
}

pid_t cur_pid = bpf_get_current_pid_tgid();
#ifdef SET_GROUP_ID
u64 cgroup_id = bpf_get_current_cgroup_id();
Expand Down
3 changes: 2 additions & 1 deletion pkg/bpfassets/attacher/bcc_attacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,11 @@ func attachBccModule() (*BccModuleTables, error) {
// so if /proc/cpuinfo is available, we can get the number of all CPUs
cores = int(cpu.TotalThreads)
}

bpfSampleRate := config.BPFSampleRate
options := []string{
"-DMAP_SIZE=" + strconv.Itoa(MapSize),
"-DNUM_CPUS=" + strconv.Itoa(cores),
"-DSAMPLE_RATE=" + strconv.Itoa(bpfSampleRate),
}
if config.EnabledEBPFCgroupID {
options = append(options, "-DSET_GROUP_ID")
Expand Down
6 changes: 6 additions & 0 deletions pkg/bpfassets/attacher/libbpf_attacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,13 @@ func attachLibbpfModule() (*bpf.Module, error) {
klog.Infof("failed to resize array %s: %v\n", arrayName, err)
}
}
// set the sample rate, this must be done before loading the object
sampleRate := config.BPFSampleRate

err = libbpfModule.InitGlobalVariable("sample_rate", int32(sampleRate))
if err != nil {
return nil, fmt.Errorf("failed to set sample rate: %v", err)
}
err = libbpfModule.BPFLoadObject()

// attach sched_switch tracepoint to kepler_trace function
Expand Down
15 changes: 15 additions & 0 deletions pkg/bpfassets/perf_event_bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/collector/metric/container_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ func (c *ContainerMetrics) UpdateCgroupMetrics() error {
}

func (c *ContainerMetrics) GetDynEnergyStat(component string) (energyStat *types.UInt64Stat) {
if c == nil {
return
}
switch component {
case PKG:
return c.DynEnergyInPkg
Expand Down
2 changes: 2 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ var (
BindAddressKey = "BIND_ADDRESS"
CPUArchOverride = getConfig("CPU_ARCH_OVERRIDE", "")
MaxLookupRetry = getIntConfig("MAX_LOOKUP_RETRY", defaultMaxLookupRetry)
BPFSampleRate = getIntConfig("EXPERIMENTAL_BPF_SAMPLE_RATE", 0)

EstimatorModel = getConfig("ESTIMATOR_MODEL", defaultMetricValue) // auto-select
EstimatorSelectFilter = getConfig("ESTIMATOR_SELECT_FILTER", defaultMetricValue) // no filter
Expand Down Expand Up @@ -149,6 +150,7 @@ func logBoolConfigs() {
klog.V(5).Infof("EXPOSE_KUBELET_METRICS: %t", ExposeKubeletMetrics)
klog.V(5).Infof("EXPOSE_IRQ_COUNTER_METRICS: %t", ExposeIRQCounterMetrics)
klog.V(5).Infof("EXPOSE_ESTIMATED_IDLE_POWER_METRICS: %t. This only impacts when the power is estimated using pre-prained models. Estimated idle power is meaningful only when Kepler is running on bare-metal or with a single virtual machine (VM) on the node.", ExposeEstimatedIdlePowerMetrics)
klog.V(5).Infof("EXPERIMENTAL_BPF_SAMPLE_RATE: %t", BPFSampleRate)
}
}

Expand Down

0 comments on commit 5f33240

Please sign in to comment.