diff --git a/pkg/bpf/exporter.go b/pkg/bpf/exporter.go index 14643370c5..5967e20f9a 100644 --- a/pkg/bpf/exporter.go +++ b/pkg/bpf/exporter.go @@ -50,8 +50,8 @@ type exporter struct { func NewExporter() (Exporter, error) { e := &exporter{ - enabledHardwareCounters: sets.New[string](), - enabledSoftwareCounters: sets.New[string](), + enabledHardwareCounters: sets.New[string](config.BPFHwCounters()...), + enabledSoftwareCounters: sets.New[string](config.BPFSwCounters()...), } err := e.attach() if err != nil { @@ -110,7 +110,6 @@ func (e *exporter) attach() error { if err != nil { return fmt.Errorf("error attaching sched_switch tracepoint: %v", err) } - e.enabledSoftwareCounters[config.CPUTime] = struct{}{} if config.ExposeIRQCounterMetrics() { e.irqLink, err = link.AttachTracing(link.TracingOptions{ @@ -120,9 +119,6 @@ func (e *exporter) attach() error { if err != nil { return fmt.Errorf("could not attach irq/softirq_entry: %w", err) } - e.enabledSoftwareCounters[config.IRQNetTXLabel] = struct{}{} - e.enabledSoftwareCounters[config.IRQNetRXLabel] = struct{}{} - e.enabledSoftwareCounters[config.IRQBlockLabel] = struct{}{} } group := "writeback" @@ -143,8 +139,6 @@ func (e *exporter) attach() error { }) if err != nil { klog.Warningf("failed to attach fentry/mark_page_accessed: %v. Kepler will not collect page cache read events. This will affect the DRAM power model estimation on VMs.", err) - } else if !e.enabledSoftwareCounters.Has(config.PageCacheHit) { - e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{} } // Return early if hardware counters are not enabled @@ -162,9 +156,6 @@ func (e *exporter) attach() error { if err != nil { return nil } - e.enabledHardwareCounters[config.CPUCycle] = struct{}{} - e.enabledHardwareCounters[config.CPUInstruction] = struct{}{} - e.enabledHardwareCounters[config.CacheMiss] = struct{}{} return nil } diff --git a/pkg/bpf/test_utils.go b/pkg/bpf/test_utils.go index 4575f90a33..09e78246cb 100644 --- a/pkg/bpf/test_utils.go +++ b/pkg/bpf/test_utils.go @@ -18,14 +18,11 @@ func DefaultSupportedMetrics() SupportedMetrics { } func defaultHardwareCounters() sets.Set[string] { - return sets.New(config.CPUCycle, config.CPUInstruction, config.CacheMiss) + return sets.New(config.BPFHwCounters()...) } func defaultSoftwareCounters() sets.Set[string] { - swCounters := sets.New(config.CPUTime, config.PageCacheHit) - if config.ExposeIRQCounterMetrics() { - swCounters.Insert(config.IRQNetTXLabel, config.IRQNetRXLabel, config.IRQBlockLabel) - } + swCounters := sets.New(config.BPFSwCounters()...) return swCounters } diff --git a/pkg/collector/metric_collector.go b/pkg/collector/metric_collector.go index 895b62337c..aa030c9ec9 100644 --- a/pkg/collector/metric_collector.go +++ b/pkg/collector/metric_collector.go @@ -64,7 +64,7 @@ type Collector struct { func NewCollector(bpfExporter bpf.Exporter) *Collector { bpfSupportedMetrics := bpfExporter.SupportedMetrics() c := &Collector{ - NodeStats: *stats.NewNodeStats(bpfSupportedMetrics), + NodeStats: *stats.NewNodeStats(), ContainerStats: map[string]*stats.ContainerStats{}, ProcessStats: map[uint64]*stats.ProcessStats{}, VMStats: map[string]*stats.VMStats{}, @@ -78,8 +78,7 @@ func (c *Collector) Initialize() error { // For local estimator, there is endpoint provided, thus we should let // model component decide whether/how to init model.CreatePowerEstimatorModels( - stats.GetProcessFeatureNames(c.bpfSupportedMetrics), - c.bpfSupportedMetrics, + stats.GetProcessFeatureNames(), ) return nil @@ -161,7 +160,7 @@ func (c *Collector) updateProcessResourceUtilizationMetrics(wg *sync.WaitGroup) resourceBpf.UpdateProcessBPFMetrics(c.bpfExporter, c.ProcessStats) if config.EnabledGPU() { if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil { - accelerator.UpdateProcessGPUUtilizationMetrics(c.ProcessStats, c.bpfSupportedMetrics) + accelerator.UpdateProcessGPUUtilizationMetrics(c.ProcessStats) } } } @@ -192,7 +191,7 @@ func (c *Collector) AggregateProcessResourceUtilizationMetrics() { if config.IsExposeVMStatsEnabled() { if process.VMID != "" { if _, ok := c.VMStats[process.VMID]; !ok { - c.VMStats[process.VMID] = stats.NewVMStats(process.PID, process.VMID, c.bpfSupportedMetrics) + c.VMStats[process.VMID] = stats.NewVMStats(process.PID, process.VMID) } c.VMStats[process.VMID].ResourceUsage[metricName].AddDeltaStat(id, delta) foundVM[process.VMID] = true @@ -277,7 +276,7 @@ func (c *Collector) AggregateProcessEnergyUtilizationMetrics() { if config.IsExposeVMStatsEnabled() { if process.VMID != "" { if _, ok := c.VMStats[process.VMID]; !ok { - c.VMStats[process.VMID] = stats.NewVMStats(process.PID, process.VMID, c.bpfSupportedMetrics) + c.VMStats[process.VMID] = stats.NewVMStats(process.PID, process.VMID) } c.VMStats[process.VMID].EnergyUsage[metricName].AddDeltaStat(id, delta) } diff --git a/pkg/collector/metric_collector_test.go b/pkg/collector/metric_collector_test.go index 30f101ebf4..7280af9a4e 100644 --- a/pkg/collector/metric_collector_test.go +++ b/pkg/collector/metric_collector_test.go @@ -44,8 +44,7 @@ var _ = Describe("Test Collector Unit", func() { bpfExporter := bpf.NewMockExporter(bpf.DefaultSupportedMetrics()) metricCollector := newMockCollector(bpfExporter) // The default estimator model is the ratio - bpfSupportedMetrics := bpfExporter.SupportedMetrics() - model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), bpfSupportedMetrics) + model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames()) // update container and node metrics metricCollector.UpdateProcessEnergyUtilizationMetrics() metricCollector.AggregateProcessEnergyUtilizationMetrics() diff --git a/pkg/collector/resourceutilization/accelerator/process_gpu_collector.go b/pkg/collector/resourceutilization/accelerator/process_gpu_collector.go index 3e216df5bc..79d0de133a 100644 --- a/pkg/collector/resourceutilization/accelerator/process_gpu_collector.go +++ b/pkg/collector/resourceutilization/accelerator/process_gpu_collector.go @@ -21,7 +21,6 @@ import ( "os" "time" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/cgroup" "github.com/sustainable-computing-io/kepler/pkg/collector/stats" "github.com/sustainable-computing-io/kepler/pkg/config" @@ -43,7 +42,7 @@ var ( ) // UpdateProcessGPUUtilizationMetrics reads the GPU metrics of each process using the GPU -func UpdateProcessGPUUtilizationMetrics(processStats map[uint64]*stats.ProcessStats, bpfSupportedMetrics bpf.SupportedMetrics) { +func UpdateProcessGPUUtilizationMetrics(processStats map[uint64]*stats.ProcessStats) { if gpu := acc.GetRegistry().ActiveAcceleratorByType(acc.GPU); gpu != nil { d := gpu.Device() migDevices := d.DeviceInstances() @@ -54,17 +53,17 @@ func UpdateProcessGPUUtilizationMetrics(processStats map[uint64]*stats.ProcessSt for _, migDevice := range migDevices[_device.(dev.GPUDevice).ID] { // device.ID is equal to migDevice.ParentID // we add the process metrics with the parent GPU ID, so that the Ratio power model will use this data to split the GPU power among the process - addGPUUtilizationToProcessStats(d, processStats, migDevice.(dev.GPUDevice), migDevice.(dev.GPUDevice).ParentID, bpfSupportedMetrics) + addGPUUtilizationToProcessStats(d, processStats, migDevice.(dev.GPUDevice), migDevice.(dev.GPUDevice).ParentID) } } else { - addGPUUtilizationToProcessStats(d, processStats, _device.(dev.GPUDevice), _device.(dev.GPUDevice).ID, bpfSupportedMetrics) + addGPUUtilizationToProcessStats(d, processStats, _device.(dev.GPUDevice), _device.(dev.GPUDevice).ID) } } } lastUtilizationTimestamp = time.Now() } -func addGPUUtilizationToProcessStats(ai dev.Device, processStats map[uint64]*stats.ProcessStats, d dev.GPUDevice, gpuID int, bpfSupportedMetrics bpf.SupportedMetrics) { +func addGPUUtilizationToProcessStats(ai dev.Device, processStats map[uint64]*stats.ProcessStats, d dev.GPUDevice, gpuID int) { var err error var processesUtilization map[uint32]any @@ -97,7 +96,7 @@ func addGPUUtilizationToProcessStats(ai dev.Device, processStats map[uint64]*sta } } } - processStats[uintPid] = stats.NewProcessStats(uintPid, uint64(0), containerID, vmID, command, bpfSupportedMetrics) + processStats[uintPid] = stats.NewProcessStats(uintPid, uint64(0), containerID, vmID, command) } gpuName := fmt.Sprintf("%d", gpuID) // GPU ID or Parent GPU ID for MIG slices processStats[uintPid].ResourceUsage[config.GPUComputeUtilization].AddDeltaStat(gpuName, uint64(processUtilization.(dev.GPUProcessUtilizationSample).ComputeUtil)) diff --git a/pkg/collector/resourceutilization/bpf/process_bpf_collector.go b/pkg/collector/resourceutilization/bpf/process_bpf_collector.go index ff6347cc41..ef25a47abe 100644 --- a/pkg/collector/resourceutilization/bpf/process_bpf_collector.go +++ b/pkg/collector/resourceutilization/bpf/process_bpf_collector.go @@ -124,7 +124,7 @@ func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]* var ok bool var pStat *stats.ProcessStats if pStat, ok = processStats[mapKey]; !ok { - pStat = stats.NewProcessStats(ct.Pid, ct.CgroupId, containerID, vmID, comm, bpfSupportedMetrics) + pStat = stats.NewProcessStats(ct.Pid, ct.CgroupId, containerID, vmID, comm) processStats[mapKey] = pStat } else if pStat.Command == "" { pStat.Command = comm diff --git a/pkg/collector/stats/benchmark_test.go b/pkg/collector/stats/benchmark_test.go index df0482e830..ff87b6ff07 100644 --- a/pkg/collector/stats/benchmark_test.go +++ b/pkg/collector/stats/benchmark_test.go @@ -38,8 +38,7 @@ func benchmarkNtesting(b *testing.B, processNumber int) { metricCollector.AggregateProcessResourceUtilizationMetrics() // The default estimator model is the ratio - bpfSupportedMetrics := bpf.DefaultSupportedMetrics() - model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), bpfSupportedMetrics) + model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames()) // update container and node metrics b.ReportAllocs() diff --git a/pkg/collector/stats/container_stats.go b/pkg/collector/stats/container_stats.go index 90525aa6f6..5bbc1f26da 100644 --- a/pkg/collector/stats/container_stats.go +++ b/pkg/collector/stats/container_stats.go @@ -18,8 +18,6 @@ package stats import ( "fmt" - - "github.com/sustainable-computing-io/kepler/pkg/bpf" ) type ContainerStats struct { @@ -33,9 +31,9 @@ type ContainerStats struct { } // NewContainerStats creates a new ContainerStats instance -func NewContainerStats(containerName, podName, podNamespace, containerID string, bpfSupportedMetrics bpf.SupportedMetrics) *ContainerStats { +func NewContainerStats(containerName, podName, podNamespace, containerID string) *ContainerStats { c := &ContainerStats{ - Stats: *NewStats(bpfSupportedMetrics), + Stats: *NewStats(), PIDS: make(map[uint64]bool), ContainerID: containerID, PodName: podName, diff --git a/pkg/collector/stats/container_stats_test.go b/pkg/collector/stats/container_stats_test.go index 6ad0f108ce..a031b1a0e1 100644 --- a/pkg/collector/stats/container_stats_test.go +++ b/pkg/collector/stats/container_stats_test.go @@ -19,7 +19,6 @@ package stats import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" ) @@ -27,7 +26,7 @@ var _ = Describe("Test Container Metric", func() { It("Test ResetDeltaValues", func() { SetMockedCollectorMetrics() - c := NewContainerStats("containerA", "podA", "test", "containerIDA", bpf.DefaultSupportedMetrics()) + c := NewContainerStats("containerA", "podA", "test", "containerIDA") c.ResourceUsage[config.CPUCycle].SetDeltaStat(MockedSocketID, 30000) c.ResourceUsage[config.CPUInstruction].SetDeltaStat(MockedSocketID, 30000) c.ResourceUsage[config.CacheMiss].SetDeltaStat(MockedSocketID, 30000) diff --git a/pkg/collector/stats/node_stats.go b/pkg/collector/stats/node_stats.go index 5b125bd8ee..d50ed1fa21 100644 --- a/pkg/collector/stats/node_stats.go +++ b/pkg/collector/stats/node_stats.go @@ -19,7 +19,6 @@ package stats import ( "fmt" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/node" acc "github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator" @@ -36,9 +35,9 @@ type NodeStats struct { nodeInfo node.Node } -func NewNodeStats(bpfSupportedMetrics bpf.SupportedMetrics) *NodeStats { +func NewNodeStats() *NodeStats { return &NodeStats{ - Stats: *NewStats(bpfSupportedMetrics), + Stats: *NewStats(), IdleResUtilization: map[string]uint64{}, nodeInfo: node.NewNodeInfo(), } diff --git a/pkg/collector/stats/process_stats.go b/pkg/collector/stats/process_stats.go index ef575eccac..76c74dbf56 100644 --- a/pkg/collector/stats/process_stats.go +++ b/pkg/collector/stats/process_stats.go @@ -18,8 +18,6 @@ package stats import ( "fmt" - - "github.com/sustainable-computing-io/kepler/pkg/bpf" ) type ProcessStats struct { @@ -33,14 +31,14 @@ type ProcessStats struct { } // NewProcessStats creates a new ProcessStats instance -func NewProcessStats(pid, cGroupID uint64, containerID, vmID, command string, bpfSupportedMetrics bpf.SupportedMetrics) *ProcessStats { +func NewProcessStats(pid, cGroupID uint64, containerID, vmID, command string) *ProcessStats { p := &ProcessStats{ PID: pid, CGroupID: cGroupID, ContainerID: containerID, VMID: vmID, Command: command, - Stats: *NewStats(bpfSupportedMetrics), + Stats: *NewStats(), } return p } diff --git a/pkg/collector/stats/stats.go b/pkg/collector/stats/stats.go index 733fab92d8..d9aebbc92c 100644 --- a/pkg/collector/stats/stats.go +++ b/pkg/collector/stats/stats.go @@ -20,17 +20,32 @@ import ( "fmt" "strings" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/collector/stats/types" "github.com/sustainable-computing-io/kepler/pkg/config" acc "github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator" "k8s.io/klog/v2" ) -var defaultBPFMetrics = []string{ - config.CPUCycle, config.CPURefCycle, config.CPUInstruction, config.CacheMiss, config.CPUTime, - config.PageCacheHit, config.IRQNetTXLabel, config.IRQNetRXLabel, config.IRQBlockLabel, -} +var ( + // Default metric sets. In addition, each metric set is used by the model + // package to estimate different power usage metrics. + // AvailableAbsEnergyMetrics holds a list of absolute energy metrics + AvailableAbsEnergyMetrics = []string{ + config.AbsEnergyInCore, config.AbsEnergyInDRAM, config.AbsEnergyInUnCore, config.AbsEnergyInPkg, + config.AbsEnergyInGPU, config.AbsEnergyInOther, config.AbsEnergyInPlatform, + } + // AvailableDynEnergyMetrics holds a list of dynamic energy metrics + AvailableDynEnergyMetrics = []string{ + config.DynEnergyInCore, config.DynEnergyInDRAM, config.DynEnergyInUnCore, config.DynEnergyInPkg, + config.DynEnergyInGPU, config.DynEnergyInOther, config.DynEnergyInPlatform, + } + // AvailableIdleEnergyMetrics holds a list of idle energy metrics + AvailableIdleEnergyMetrics = []string{ + config.IdleEnergyInCore, config.IdleEnergyInDRAM, config.IdleEnergyInUnCore, config.IdleEnergyInPkg, + config.IdleEnergyInGPU, config.IdleEnergyInOther, config.IdleEnergyInPlatform, + } + AvailableBPFMetrics = append(config.BPFHwCounters(), config.BPFSwCounters()...) +) // metricSets stores different sets of metrics for energy and resource usage. type metricSets struct { @@ -40,7 +55,6 @@ type metricSets struct { bpfMetrics []string } -// Stats stores resource and energy usage statistics. type Stats struct { ResourceUsage map[string]types.UInt64StatCollection EnergyUsage map[string]types.UInt64StatCollection @@ -62,12 +76,12 @@ func newMetricSets() *metricSets { config.IdleEnergyInCore, config.IdleEnergyInDRAM, config.IdleEnergyInUnCore, config.IdleEnergyInPkg, config.IdleEnergyInGPU, config.IdleEnergyInOther, config.IdleEnergyInPlatform, }, - bpfMetrics: defaultBPFMetrics, + bpfMetrics: AvailableBPFMetrics, } } // NewStats creates a new Stats instance -func NewStats(bpfSupportedMetrics bpf.SupportedMetrics) *Stats { +func NewStats() *Stats { stats := &Stats{ ResourceUsage: make(map[string]types.UInt64StatCollection), EnergyUsage: make(map[string]types.UInt64StatCollection), @@ -82,8 +96,9 @@ func NewStats(bpfSupportedMetrics bpf.SupportedMetrics) *Stats { stats.EnergyUsage[metricName] = types.NewUInt64StatCollection() } - // Initialize the resource utilization metrics in the map. - for _, metricName := range stats.BPFMetrics() { + // initialize the resource utilization metrics in the map + resMetrics := append([]string{}, AvailableBPFMetrics...) + for _, metricName := range resMetrics { stats.ResourceUsage[metricName] = types.NewUInt64StatCollection() } diff --git a/pkg/collector/stats/stats_test.go b/pkg/collector/stats/stats_test.go index 2546272f8a..6ed6e7fd69 100644 --- a/pkg/collector/stats/stats_test.go +++ b/pkg/collector/stats/stats_test.go @@ -3,7 +3,6 @@ package stats import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" ) @@ -11,8 +10,7 @@ var _ = Describe("Stats", func() { It("Test InitAvailableParamAndMetrics", func() { config.GetConfig() config.SetEnabledHardwareCounterMetrics(false) - supportedMetrics := bpf.DefaultSupportedMetrics() exp := []string{} - Expect(len(GetProcessFeatureNames(supportedMetrics)) >= len(exp)).To(BeTrue()) + Expect(len(GetProcessFeatureNames()) >= len(exp)).To(BeTrue()) }) }) diff --git a/pkg/collector/stats/test_utils.go b/pkg/collector/stats/test_utils.go index 83f95b3c27..ddb623d442 100644 --- a/pkg/collector/stats/test_utils.go +++ b/pkg/collector/stats/test_utils.go @@ -19,7 +19,6 @@ package stats import ( "strconv" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" acc "github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator" "k8s.io/klog/v2" @@ -54,7 +53,7 @@ func createMockedProcessMetric(idx int) *ProcessStats { vmID := "vm" + strconv.Itoa(idx) command := "command" + strconv.Itoa(idx) uintPid := uint64(idx) - processMetrics := NewProcessStats(uintPid, uintPid, containerID, vmID, command, bpf.DefaultSupportedMetrics()) + processMetrics := NewProcessStats(uintPid, uintPid, containerID, vmID, command) // counter - attacher package processMetrics.ResourceUsage[config.CPUCycle].SetDeltaStat(MockedSocketID, 30000) processMetrics.ResourceUsage[config.CPUInstruction].SetDeltaStat(MockedSocketID, 30000) @@ -66,7 +65,7 @@ func createMockedProcessMetric(idx int) *ProcessStats { // CreateMockedNodeStats creates a node metric with power consumption and add the process resource utilization func CreateMockedNodeStats() NodeStats { - nodeMetrics := NewNodeStats(bpf.DefaultSupportedMetrics()) + nodeMetrics := NewNodeStats() // add power metrics // add first values to be the idle power nodeMetrics.EnergyUsage[config.AbsEnergyInPkg].SetDeltaStat(MockedSocketID, 5000) // mili joules diff --git a/pkg/collector/stats/utils.go b/pkg/collector/stats/utils.go index 0fa2d962db..67b6fc28ac 100644 --- a/pkg/collector/stats/utils.go +++ b/pkg/collector/stats/utils.go @@ -19,21 +19,14 @@ package stats import ( "k8s.io/klog/v2" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" acc "github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator" ) -func GetProcessFeatureNames(bpfSupportedMetrics bpf.SupportedMetrics) []string { +func GetProcessFeatureNames() []string { var metrics []string - // bpf software counter metrics - for counterKey := range bpfSupportedMetrics.SoftwareCounters { - metrics = append(metrics, counterKey) - } - // bpf hardware counter metrics - for counterKey := range bpfSupportedMetrics.HardwareCounters { - metrics = append(metrics, counterKey) - } + // bpf counter metrics + metrics = append(metrics, AvailableBPFMetrics...) klog.V(3).Infof("Available ebpf counters: %v", metrics) // gpu metric diff --git a/pkg/collector/stats/vm_stats.go b/pkg/collector/stats/vm_stats.go index 2da8c53865..64080f8ef1 100644 --- a/pkg/collector/stats/vm_stats.go +++ b/pkg/collector/stats/vm_stats.go @@ -16,8 +16,6 @@ limitations under the License. package stats -import "github.com/sustainable-computing-io/kepler/pkg/bpf" - type VMStats struct { Stats PID uint64 @@ -25,11 +23,11 @@ type VMStats struct { } // NewVMStats creates a new VMStats instance -func NewVMStats(pid uint64, vmID string, bpfSupportedMetrics bpf.SupportedMetrics) *VMStats { +func NewVMStats(pid uint64, vmID string) *VMStats { vm := &VMStats{ PID: pid, VMID: vmID, - Stats: *NewStats(bpfSupportedMetrics), + Stats: *NewStats(), } return vm } diff --git a/pkg/collector/stats/vm_stats_test.go b/pkg/collector/stats/vm_stats_test.go index 1ba5ad2ddc..67e7482496 100644 --- a/pkg/collector/stats/vm_stats_test.go +++ b/pkg/collector/stats/vm_stats_test.go @@ -3,7 +3,6 @@ package stats import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" ) @@ -11,7 +10,7 @@ var _ = Describe("VMMetric", func() { It("Test ResetDeltaValues", func() { SetMockedCollectorMetrics() - vm := NewVMStats(0, "name", bpf.DefaultSupportedMetrics()) + vm := NewVMStats(0, "name") vm.ResourceUsage[config.CPUTime].AddDeltaStat("socket0", 30000) vm.ResetDeltaValues() Expect(vm.ResourceUsage[config.CPUTime].SumAllDeltaValues()).To(Equal(uint64(0))) diff --git a/pkg/collector/utils.go b/pkg/collector/utils.go index 18feced185..4c9850ac48 100644 --- a/pkg/collector/utils.go +++ b/pkg/collector/utils.go @@ -36,7 +36,7 @@ func (c *Collector) createContainerStatsIfNotExist(containerID string, cGroupID, if !kubernetes.IsWatcherEnabled { info, _ := cgroup.GetContainerInfo(cGroupID, pid, withCGroupID) c.ContainerStats[containerID] = stats.NewContainerStats( - info.ContainerName, info.PodName, info.Namespace, containerID, c.bpfSupportedMetrics) + info.ContainerName, info.PodName, info.Namespace, containerID) } else { name := utils.SystemProcessName namespace := utils.SystemProcessNamespace @@ -47,7 +47,7 @@ func (c *Collector) createContainerStatsIfNotExist(containerID string, cGroupID, } // We feel the info with generic values because the watcher will eventually update it. c.ContainerStats[containerID] = stats.NewContainerStats( - name, name, namespace, containerID, c.bpfSupportedMetrics) + name, name, namespace, containerID) } } else { // TODO set only the most resource intensive PID for the container diff --git a/pkg/config/config.go b/pkg/config/config.go index d02a7e4296..4ef6475e07 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -668,3 +668,11 @@ func APIServerEnabled() bool { ensureConfigInitialized() return instance.Kepler.EnableAPIServer } + +func BPFHwCounters() []string { + return []string{CPUCycle, CPUInstruction, CacheMiss, CPURefCycle} +} + +func BPFSwCounters() []string { + return []string{CPUTime, IRQNetTXLabel, IRQNetRXLabel, IRQBlockLabel, PageCacheHit} +} diff --git a/pkg/kubernetes/watcher.go b/pkg/kubernetes/watcher.go index ad7e5023da..a8c9a6b769 100644 --- a/pkg/kubernetes/watcher.go +++ b/pkg/kubernetes/watcher.go @@ -286,7 +286,7 @@ func (w *ObjListWatcher) fillInfo(pod *corev1.Pod, containers []corev1.Container continue } if _, exist = w.ContainerStats[containerID]; !exist { - w.ContainerStats[containerID] = stats.NewContainerStats(containers[j].Name, pod.Name, pod.Namespace, containerID, w.bpfSupportedMetrics) + w.ContainerStats[containerID] = stats.NewContainerStats(containers[j].Name, pod.Name, pod.Namespace, containerID) } klog.V(5).Infof("receiving container %s %s %s %s", containers[j].Name, pod.Name, pod.Namespace, containerID) w.ContainerStats[containerID].ContainerName = containers[j].Name diff --git a/pkg/metrics/prometheus_collector_test.go b/pkg/metrics/prometheus_collector_test.go index dd6a547b1a..abc277d19f 100644 --- a/pkg/metrics/prometheus_collector_test.go +++ b/pkg/metrics/prometheus_collector_test.go @@ -88,7 +88,7 @@ var _ = Describe("Test Prometheus Collector Unit", func() { nodeStats.UpdateDynEnergy() - model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), bpfSupportedMetrics) + model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames()) model.UpdateProcessEnergy(processStats, &nodeStats) // get metrics from prometheus diff --git a/pkg/model/benchmark_test.go b/pkg/model/benchmark_test.go index 74c81e68bd..64212f102b 100644 --- a/pkg/model/benchmark_test.go +++ b/pkg/model/benchmark_test.go @@ -43,8 +43,7 @@ func benchmarkNtesting(b *testing.B, processNumber int) { metricCollector.AggregateProcessResourceUtilizationMetrics() // The default estimator model is the ratio - bpfSupportedMetrics := bpf.DefaultSupportedMetrics() - model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), bpfSupportedMetrics) + model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames()) // update container and node metrics b.ReportAllocs() diff --git a/pkg/model/model.go b/pkg/model/model.go index 417a951106..99ba5524f2 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -20,7 +20,6 @@ import ( "fmt" "strings" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model/estimator/local" "github.com/sustainable-computing-io/kepler/pkg/model/estimator/local/regressor" @@ -71,9 +70,9 @@ type PowerModelInterface interface { } // CreatePowerEstimatorModels checks validity of power model and set estimate functions -func CreatePowerEstimatorModels(processFeatureNames []string, bpfSupportedMetrics bpf.SupportedMetrics) { +func CreatePowerEstimatorModels(processFeatureNames []string) { config.InitModelConfigMap() - CreateProcessPowerEstimatorModel(processFeatureNames, bpfSupportedMetrics) + CreateProcessPowerEstimatorModel(processFeatureNames) // Node power estimator uses the process features to estimate node power, expect for the Ratio power model that contains additional metrics. CreateNodePlatformPoweEstimatorModel(processFeatureNames) CreateNodeComponentPowerEstimatorModel(processFeatureNames) diff --git a/pkg/model/process_energy.go b/pkg/model/process_energy.go index 32b22cfa3d..824016c367 100644 --- a/pkg/model/process_energy.go +++ b/pkg/model/process_energy.go @@ -36,9 +36,10 @@ var ( ) // createProcessPowerModelConfig: the process component power model must be set by default. -func createProcessPowerModelConfig(powerSourceTarget string, processFeatureNames []string, energySource string, bpfSupportedMetrics bpf.SupportedMetrics) (modelConfig *types.ModelConfig) { +func createProcessPowerModelConfig(powerSourceTarget string, processFeatureNames []string, energySource string) (modelConfig *types.ModelConfig) { systemMetaDataFeatureNames := node.MetadataFeatureNames() systemMetaDataFeatureValues := node.MetadataFeatureValues() + bpfSupportedMetrics := bpf.DefaultSupportedMetrics() modelConfig = CreatePowerModelConfig(powerSourceTarget) if modelConfig == nil { return nil @@ -108,13 +109,13 @@ func createProcessPowerModelConfig(powerSourceTarget string, processFeatureNames return modelConfig } -func CreateProcessPowerEstimatorModel(processFeatureNames []string, bpfSupportedMetrics bpf.SupportedMetrics) { +func CreateProcessPowerEstimatorModel(processFeatureNames []string) { keys := map[string]string{ config.ProcessPlatformPowerKey(): types.PlatformEnergySource, config.ProcessComponentsPowerKey(): types.ComponentEnergySource, } for k, v := range keys { - modelConfig := createProcessPowerModelConfig(k, processFeatureNames, v, bpfSupportedMetrics) + modelConfig := createProcessPowerModelConfig(k, processFeatureNames, v) modelConfig.IsNodePowerModel = false m, err := createPowerModelEstimator(modelConfig) switch k { diff --git a/pkg/model/process_energy_test.go b/pkg/model/process_energy_test.go index f6e355274a..38ceb299b2 100644 --- a/pkg/model/process_energy_test.go +++ b/pkg/model/process_energy_test.go @@ -21,7 +21,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/collector/stats" "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/sensors/components" @@ -65,8 +64,7 @@ var _ = Describe("ProcessPower", func() { os.Setenv("MODEL_CONFIG", configStr) // getEstimatorMetrics - bpfSupportedMetrics := bpf.DefaultSupportedMetrics() - CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), bpfSupportedMetrics) + CreatePowerEstimatorModels(stats.GetProcessFeatureNames()) // initialize the node energy with aggregated energy, which will be used to calculate delta energy // add first values to be the idle power @@ -101,8 +99,7 @@ var _ = Describe("ProcessPower", func() { os.Setenv("MODEL_CONFIG", configStr) // getEstimatorMetrics - bpfSupportedMetrics := bpf.DefaultSupportedMetrics() - CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), bpfSupportedMetrics) + CreatePowerEstimatorModels(stats.GetProcessFeatureNames()) // initialize the node energy with aggregated energy, which will be used to calculate delta energy // add first values to be the idle power