Skip to content

Commit

Permalink
Refactor the code to remove duplications (#1089)
Browse files Browse the repository at this point in the history
* collector: refator metric package to stats

Signed-off-by: Marcelo Amaral <[email protected]>

* collector: move prometheus collector to metrics package

Signed-off-by: Marcelo Amaral <[email protected]>

* model: calculate processes energy and aggregate per container or vm

Signed-off-by: Marcelo Amaral <[email protected]>

* libvirt: find vm id from cgroup path

Signed-off-by: Marcelo Amaral <[email protected]>

* sensors: rename power package to sensors

Signed-off-by: Marcelo Amaral <[email protected]>

* libbpf: rebuild

Signed-off-by: Marcelo Amaral <[email protected]>

* bcc: fix cgroup id

Signed-off-by: Marcelo Amaral <[email protected]>

* e2e: wait kepler start

Signed-off-by: Marcelo Amaral <[email protected]>

* code: rebase

Signed-off-by: Marcelo Amaral <[email protected]>

---------

Signed-off-by: Marcelo Amaral <[email protected]>
  • Loading branch information
marceloamaral authored Dec 19, 2023
1 parent 158305a commit 7c99198
Show file tree
Hide file tree
Showing 133 changed files with 3,606 additions and 5,736 deletions.
19 changes: 14 additions & 5 deletions .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,26 @@ jobs:
env:
GOPATH: /home/runner/go
GOBIN: /home/runner/go/bin
- name: install libbpf
uses: sustainable-computing-io/[email protected]
with:
ebpfprovider: libbpf
- name: Prepare environment
run: |
sudo apt-get install -y cpuid
sudo apt-get install -y cpuid clang
cd doc/ && sudo ./dev/prepare_dev_env.sh && cd -
git config --global --add safe.directory /kepler
- name: install libbpf
uses: sustainable-computing-io/[email protected]
with:
ebpfprovider: libbpf
- name: Run
run: |
sudo apt remove libbpf-dev
mkdir temp-libbpf
cd temp-libbpf
git clone https://github.com/libbpf/libbpf
cd libbpf/src
sudo make install_headers
sudo make install_uapi_headers
sudo prefix=/usr BUILD_STATIC_ONLY=y make install
cd ../../../
ATTACHER_TAG=libbpf make test-verbose
go tool cover -func=coverage.out -o=coverage.out
- name: Go Coverage Badge # Pass the `coverage.out` output to this action
Expand Down
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,4 @@ issues:
text: "could not import C"
- linters:
- dupl
text: ".*pkg/collector/metric/types/types_test.go.*" # false positive, https://github.com/mibk/dupl/issues/20
text: ".*pkg/collector/stats/types/types_test.go.*" # false positive, https://github.com/mibk/dupl/issues/20
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export TIMESTAMP ?=$(shell echo $(BIN_TIMESTAMP) | tr -d ':' | tr 'T' '-' | tr -
# restrict included verify-* targets to only process project files
SOURCE_GIT_TAG := $(shell git describe --tags --always --abbrev=7 --match 'v*')
SRC_ROOT := $(shell pwd)
ARCH := $(shell arch)
OUTPUT_DIR := _output
CROSS_BUILD_BINDIR := $(OUTPUT_DIR)/bin
GIT_VERSION := $(shell git describe --dirty --tags --always --match='v*')
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<img align="right" width="250px" src="https://user-images.githubusercontent.com/17484350/138557170-d8079b94-a517-4366-ade8-8d473e3f3f1d.jpg">

![GitHub Workflow Status (event)](https://img.shields.io/github/actions/workflow/status/sustainable-computing-io/kepler/unit_test.yml?branch=main&label=CI)

![Coverage](https://img.shields.io/badge/Coverage-42.1%25-yellow)
[![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7391/badge)](https://bestpractices.coreinfrastructure.org/projects/7391)
<!--
Expand Down
2 changes: 1 addition & 1 deletion bpfassets/bcc/bcc.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ int kprobe__finish_task_switch(struct pt_regs *ctx, struct task_struct *prev)
#ifdef SET_GROUP_ID
u64 cgroup_id = bpf_get_current_cgroup_id();
#else
u64 cgroup_id = 0;
u64 cgroup_id = task->cgroups->subsys[0]->cgroup->id;
#endif

u64 cur_ts = bpf_ktime_get_ns();
Expand Down
Binary file modified bpfassets/libbpf/bpf.o/amd64_kepler.bpf.o
Binary file not shown.
1 change: 1 addition & 0 deletions bpfassets/libbpf/src/kepler.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ int kepler_trace(struct sched_switch_args *ctx)

u32 cur_pid = bpf_get_current_pid_tgid();
u64 cgroup_id = bpf_get_current_cgroup_id(); // the cgroup id is the cgroup id of the running process (this is not next_pid or prev_pid)

u64 cur_ts = bpf_ktime_get_ns();
u32 cpu_id = bpf_get_smp_processor_id();
u32 prev_pid = ctx->prev_pid;
Expand Down
28 changes: 15 additions & 13 deletions cmd/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,16 @@ import (
"time"

"github.com/sustainable-computing-io/kepler/pkg/bpfassets/attacher"
collector_metric "github.com/sustainable-computing-io/kepler/pkg/collector/metric"
"github.com/sustainable-computing-io/kepler/pkg/collector/stats"
"github.com/sustainable-computing-io/kepler/pkg/config"
"github.com/sustainable-computing-io/kepler/pkg/manager"
"github.com/sustainable-computing-io/kepler/pkg/power/accelerator/gpu"
"github.com/sustainable-computing-io/kepler/pkg/power/accelerator/qat"
"github.com/sustainable-computing-io/kepler/pkg/power/components"
"github.com/sustainable-computing-io/kepler/pkg/power/platform"
"github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator/gpu"
"github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator/qat"
"github.com/sustainable-computing-io/kepler/pkg/sensors/components"
"github.com/sustainable-computing-io/kepler/pkg/sensors/platform"
kversion "github.com/sustainable-computing-io/kepler/pkg/version"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/version"

"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -197,7 +195,7 @@ func main() {
components.InitPowerImpl()
platform.InitPowerImpl()

collector_metric.InitAvailableParamAndMetrics()
stats.InitAvailableParamAndMetrics()

if config.EnabledGPU {
klog.Infof("Initializing the GPU collector")
Expand All @@ -218,7 +216,7 @@ func main() {
}
}

if config.EnabledQAT {
if config.IsExposeQATMetricsEnabled() {
klog.Infof("Initializing the QAT collector")
err := qat.Init()
if err == nil {
Expand All @@ -229,9 +227,8 @@ func main() {
}

m := manager.New()
prometheus.MustRegister(version.NewCollector("kepler_exporter"))
prometheus.MustRegister(m.PrometheusCollector)
defer m.MetricCollector.Destroy()
reg := m.PrometheusCollector.RegisterMetrics()
defer m.StatsCollector.Destroy()
defer components.StopPower()

// starting a new gorotine to collect data and report metrics
Expand All @@ -242,7 +239,12 @@ func main() {
metricPathConfig := config.GetMetricPath(*metricsPath)
bindAddressConfig := config.GetBindAddress(*address)

http.Handle(metricPathConfig, promhttp.Handler())
http.Handle(metricPathConfig, promhttp.HandlerFor(
reg,
promhttp.HandlerOpts{
Registry: reg,
},
))
http.HandleFunc("/healthz", healthProbe)
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
_, err := w.Write([]byte(`<html>
Expand Down
6 changes: 3 additions & 3 deletions cmd/validator/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ import (
"time"

"github.com/jaypipes/ghw"
"github.com/sustainable-computing-io/kepler/pkg/power/components"
"github.com/sustainable-computing-io/kepler/pkg/power/components/source"
"github.com/sustainable-computing-io/kepler/pkg/power/platform"
"github.com/sustainable-computing-io/kepler/pkg/sensors/components"
"github.com/sustainable-computing-io/kepler/pkg/sensors/components/source"
"github.com/sustainable-computing-io/kepler/pkg/sensors/platform"
)

const (
Expand Down
3 changes: 1 addition & 2 deletions e2e/integration-test/e2e_metric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func getEnvOrDefault(envName, defaultValue string) string {
}

var _ = Describe("Metrics check should pass", Ordered, func() {

var keplerMetric *TestKeplerMetric

_ = BeforeAll(func() {
Expand Down Expand Up @@ -177,7 +178,6 @@ var _ = Describe("Metrics check should pass", Ordered, func() {
Entry(nil, "kepler_node_core_joules_total"), // node level check by instance
Entry(nil, "kepler_node_dram_joules_total"), // node level check by instance
Entry(nil, "kepler_node_info"), // node level missing labels
Entry(nil, "kepler_node_other_joules_total"), // node level check by instance
Entry(nil, "kepler_node_package_joules_total"), // node levelcheck by instance
Entry(nil, "kepler_node_platform_joules_total"), // node levelcheck by instance
Entry(nil, "kepler_node_uncore_joules_total"), // node levelcheck by instance
Expand Down Expand Up @@ -214,7 +214,6 @@ var _ = Describe("Metrics check should pass", Ordered, func() {
Entry(nil, "kepler_container_core_joules_total"), // pod level
Entry(nil, "kepler_container_dram_joules_total"), // pod level
Entry(nil, "kepler_container_joules_total"), // pod level
Entry(nil, "kepler_container_other_joules_total"), // pod level
Entry(nil, "kepler_container_package_joules_total"), // pod level
Entry(nil, "kepler_container_uncore_joules_total"), // pod level
)
Expand Down
5 changes: 1 addition & 4 deletions hack/cluster-deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@ build_manifest() {
return 0
}
header "Build Kepler Manifest"
run make build-manifest \
OPTS="$OPTS" \
IMAGE_REPO="$IMAGE_REPO" \
IMAGE_TAG="$IMAGE_TAG"
run make build-manifest OPTS="$OPTS"
}

build_kepler() {
Expand Down
4 changes: 2 additions & 2 deletions pkg/bpfassets/attacher/bpf_perf.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ func getCounters() map[string]perfCounter {
func GetEnabledBPFHWCounters() []string {
Counters = getCounters()
var metrics []string
klog.V(5).Infof("hardeware counter metrics config %t", config.ExposeHardwareCounterMetrics)
if !config.ExposeHardwareCounterMetrics {
klog.V(5).Infof("hardeware counter metrics config %t", config.IsHCMetricsEnabled())
if !config.IsHCMetricsEnabled() {
klog.V(5).Info("hardeware counter metrics not enabled")
return metrics
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/bpfassets/attacher/libbpf_attacher_stub.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ const (
var libbpfCounters = map[string]perfCounter{}

func attachLibbpfModule() (interface{}, error) {
if LibbpfBuilt {
return nil, nil
}
return nil, fmt.Errorf("no libbpf build tag")
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/cgroup/cgroup_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License.

package cgroup

import "github.com/sustainable-computing-io/kepler/pkg/collector/metric/types"
import "github.com/sustainable-computing-io/kepler/pkg/collector/stats/types"

type CCgroupStatHandler interface {
SetCGroupStat(containerID string, CgroupStatMap map[string]*types.UInt64StatCollection) error
Expand Down
2 changes: 1 addition & 1 deletion pkg/cgroup/cgroup_stats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"

"github.com/sustainable-computing-io/kepler/pkg/collector/metric/types"
"github.com/sustainable-computing-io/kepler/pkg/collector/stats/types"
"github.com/sustainable-computing-io/kepler/pkg/config"
)

Expand Down
26 changes: 14 additions & 12 deletions pkg/cgroup/resolve_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,22 @@ func GetContainerID(cGroupID, pid uint64, withCGroupID bool) (string, error) {
return info.ContainerID, err
}

func GetContainerMetrics() (containerCPU, containerMem map[string]float64, retErr error) {
return podLister.ListMetrics()
}

func GetAvailableKubeletMetrics() []string {
return podLister.GetAvailableMetrics()
}

func GetContainerInfo(cGroupID, pid uint64, withCGroupID bool) (*ContainerInfo, error) {
var err error
var containerID string

name := utils.SystemProcessName
namespace := utils.SystemProcessNamespace
if cGroupID == 1 {
// some kernel processes have cgroup id equal 1 or 0
name = utils.KernelProcessName
namespace = utils.KernelProcessNamespace
}
info := &ContainerInfo{
ContainerID: utils.SystemProcessName,
ContainerName: utils.SystemProcessName,
PodName: utils.SystemProcessName,
Namespace: utils.SystemProcessNamespace,
ContainerID: name,
ContainerName: name,
PodName: name,
Namespace: namespace,
}

if containerID, err = getContainerIDFromPath(cGroupID, pid, withCGroupID); err != nil {
Expand All @@ -113,6 +112,9 @@ func ParseContainerIDFromPodStatus(containerID string) string {
}

func getContainerIDFromPath(cGroupID, pid uint64, withCGroupID bool) (string, error) {
if cGroupID == 1 {
return utils.KernelProcessName, nil
}
var err error
var containerID string
if withCGroupID {
Expand Down
85 changes: 0 additions & 85 deletions pkg/collector/container_accelerator_collector.go

This file was deleted.

26 changes: 0 additions & 26 deletions pkg/collector/container_energy_collector.go

This file was deleted.

Loading

0 comments on commit 7c99198

Please sign in to comment.