Skip to content

Commit

Permalink
chore: cleanup stats.go
Browse files Browse the repository at this point in the history
Remove the globals in stats.go and tidy up the file.

Signed-off-by: Maryam Tahhan <[email protected]>
  • Loading branch information
maryamtahhan committed Aug 1, 2024
1 parent 6d72d65 commit d2e2cc2
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 127 deletions.
258 changes: 135 additions & 123 deletions pkg/collector/stats/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,228 +26,240 @@ import (
"k8s.io/klog/v2"
)

var (
// Default metric sets. In addition, each metric set is used by the model
// package to estimate different power usage metrics.
// AvailableAbsEnergyMetrics holds a list of absolute energy metrics
AvailableAbsEnergyMetrics = []string{
config.AbsEnergyInCore, config.AbsEnergyInDRAM, config.AbsEnergyInUnCore, config.AbsEnergyInPkg,
config.AbsEnergyInGPU, config.AbsEnergyInOther, config.AbsEnergyInPlatform,
}
// AvailableDynEnergyMetrics holds a list of dynamic energy metrics
AvailableDynEnergyMetrics = []string{
config.DynEnergyInCore, config.DynEnergyInDRAM, config.DynEnergyInUnCore, config.DynEnergyInPkg,
config.DynEnergyInGPU, config.DynEnergyInOther, config.DynEnergyInPlatform,
}
// AvailableIdleEnergyMetrics holds a list of idle energy metrics
AvailableIdleEnergyMetrics = []string{
config.IdleEnergyInCore, config.IdleEnergyInDRAM, config.IdleEnergyInUnCore, config.IdleEnergyInPkg,
config.IdleEnergyInGPU, config.IdleEnergyInOther, config.IdleEnergyInPlatform,
}
// AvailableBPFMetrics holds a list of reasonable default bpf metrics
AvailableBPFMetrics = []string{
config.CPUCycle, config.CPURefCycle, config.CPUInstruction, config.CacheMiss, config.CPUTime,
config.PageCacheHit, config.IRQNetTXLabel, config.IRQNetRXLabel, config.IRQBlockLabel,
}
)
var defaultBPFMetrics = []string{
config.CPUCycle, config.CPURefCycle, config.CPUInstruction, config.CacheMiss, config.CPUTime,
config.PageCacheHit, config.IRQNetTXLabel, config.IRQNetRXLabel, config.IRQBlockLabel,
}

// MetricSets stores different sets of metrics for energy and resource usage.
type MetricSets struct {
AbsEnergyMetrics []string
DynEnergyMetrics []string
IdleEnergyMetrics []string
BPFMetrics []string
}

// Stats stores resource and energy usage statistics.
type Stats struct {
ResourceUsage map[string]types.UInt64StatCollection
EnergyUsage map[string]types.UInt64StatCollection
ResourceUsage map[string]types.UInt64StatCollection
EnergyUsage map[string]types.UInt64StatCollection
AvailableMetrics *MetricSets
}

// newMetricSets initializes and returns a new MetricSets instance.
func newMetricSets() *MetricSets {
return &MetricSets{
AbsEnergyMetrics: []string{
config.AbsEnergyInCore, config.AbsEnergyInDRAM, config.AbsEnergyInUnCore, config.AbsEnergyInPkg,
config.AbsEnergyInGPU, config.AbsEnergyInOther, config.AbsEnergyInPlatform,
},
DynEnergyMetrics: []string{
config.DynEnergyInCore, config.DynEnergyInDRAM, config.DynEnergyInUnCore, config.DynEnergyInPkg,
config.DynEnergyInGPU, config.DynEnergyInOther, config.DynEnergyInPlatform,
},
IdleEnergyMetrics: []string{
config.IdleEnergyInCore, config.IdleEnergyInDRAM, config.IdleEnergyInUnCore, config.IdleEnergyInPkg,
config.IdleEnergyInGPU, config.IdleEnergyInOther, config.IdleEnergyInPlatform,
},
BPFMetrics: defaultBPFMetrics,
}
}

// NewStats creates a new Stats instance
// NewStats creates a new Stats instance.
func NewStats() *Stats {
m := &Stats{
ResourceUsage: make(map[string]types.UInt64StatCollection),
EnergyUsage: make(map[string]types.UInt64StatCollection),
stats := &Stats{
ResourceUsage: make(map[string]types.UInt64StatCollection),
EnergyUsage: make(map[string]types.UInt64StatCollection),
AvailableMetrics: newMetricSets(),
}

// initialize the energy metrics in the map
energyMetrics := []string{}
energyMetrics = append(energyMetrics, AvailableDynEnergyMetrics...)
energyMetrics = append(energyMetrics, AvailableAbsEnergyMetrics...)
energyMetrics = append(energyMetrics, AvailableIdleEnergyMetrics...)
// Initialize the energy metrics in the map.
energyMetrics := append([]string{}, stats.AvailableMetrics.AbsEnergyMetrics...)
energyMetrics = append(energyMetrics, stats.AvailableMetrics.DynEnergyMetrics...)
energyMetrics = append(energyMetrics, stats.AvailableMetrics.IdleEnergyMetrics...)
for _, metricName := range energyMetrics {
m.EnergyUsage[metricName] = types.NewUInt64StatCollection()
stats.EnergyUsage[metricName] = types.NewUInt64StatCollection()
}

// initialize the resource utilization metrics in the map
resMetrics := []string{}
resMetrics = append(resMetrics, AvailableBPFMetrics...)
for _, metricName := range resMetrics {
m.ResourceUsage[metricName] = types.NewUInt64StatCollection()
// Initialize the resource utilization metrics in the map.
for _, metricName := range stats.AvailableMetrics.BPFMetrics {
stats.ResourceUsage[metricName] = types.NewUInt64StatCollection()
}

if config.EnabledGPU {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
m.ResourceUsage[config.GPUComputeUtilization] = types.NewUInt64StatCollection()
m.ResourceUsage[config.GPUMemUtilization] = types.NewUInt64StatCollection()
m.ResourceUsage[config.IdleEnergyInGPU] = types.NewUInt64StatCollection()
stats.ResourceUsage[config.GPUComputeUtilization] = types.NewUInt64StatCollection()
stats.ResourceUsage[config.GPUMemUtilization] = types.NewUInt64StatCollection()
stats.ResourceUsage[config.IdleEnergyInGPU] = types.NewUInt64StatCollection()
}
}

return m
return stats
}

// ResetDeltaValues reset all current value to 0
func (m *Stats) ResetDeltaValues() {
for _, stat := range m.ResourceUsage {
// ResetDeltaValues resets all current values to 0.
func (s *Stats) ResetDeltaValues() {
for _, stat := range s.ResourceUsage {
stat.ResetDeltaValues()
}
for metric, stat := range m.EnergyUsage {
for metric, stat := range s.EnergyUsage {
if strings.Contains(metric, "idle") {
continue // do not reset the idle power metrics
continue // Do not reset the idle power metrics.
}
stat.ResetDeltaValues()
}
}

func (m *Stats) String() string {
func (s *Stats) String() string {
return fmt.Sprintf(
"\tDyn ePkg (mJ): %s (eCore: %s eDram: %s eUncore: %s) eGPU (mJ): %s eOther (mJ): %s platform (mJ): %s \n"+
"\tIdle ePkg (mJ): %s (eCore: %s eDram: %s eUncore: %s) eGPU (mJ): %s eOther (mJ): %s platform (mJ): %s \n"+
"\tResUsage: %v\n",
m.EnergyUsage[config.DynEnergyInPkg],
m.EnergyUsage[config.DynEnergyInCore],
m.EnergyUsage[config.DynEnergyInDRAM],
m.EnergyUsage[config.DynEnergyInUnCore],
m.EnergyUsage[config.DynEnergyInGPU],
m.EnergyUsage[config.DynEnergyInOther],
m.EnergyUsage[config.DynEnergyInPlatform],
m.EnergyUsage[config.IdleEnergyInPkg],
m.EnergyUsage[config.IdleEnergyInCore],
m.EnergyUsage[config.IdleEnergyInDRAM],
m.EnergyUsage[config.IdleEnergyInUnCore],
m.EnergyUsage[config.IdleEnergyInGPU],
m.EnergyUsage[config.IdleEnergyInOther],
m.EnergyUsage[config.IdleEnergyInPlatform],
m.ResourceUsage)
s.EnergyUsage[config.DynEnergyInPkg],
s.EnergyUsage[config.DynEnergyInCore],
s.EnergyUsage[config.DynEnergyInDRAM],
s.EnergyUsage[config.DynEnergyInUnCore],
s.EnergyUsage[config.DynEnergyInGPU],
s.EnergyUsage[config.DynEnergyInOther],
s.EnergyUsage[config.DynEnergyInPlatform],
s.EnergyUsage[config.IdleEnergyInPkg],
s.EnergyUsage[config.IdleEnergyInCore],
s.EnergyUsage[config.IdleEnergyInDRAM],
s.EnergyUsage[config.IdleEnergyInUnCore],
s.EnergyUsage[config.IdleEnergyInGPU],
s.EnergyUsage[config.IdleEnergyInOther],
s.EnergyUsage[config.IdleEnergyInPlatform],
s.ResourceUsage,
)
}

// UpdateDynEnergy calculates the dynamic energy
func (m *Stats) UpdateDynEnergy() {
for pkgID := range m.EnergyUsage[config.AbsEnergyInPkg] {
m.CalcDynEnergy(config.AbsEnergyInPkg, config.IdleEnergyInPkg, config.DynEnergyInPkg, pkgID)
m.CalcDynEnergy(config.AbsEnergyInCore, config.IdleEnergyInCore, config.DynEnergyInCore, pkgID)
m.CalcDynEnergy(config.AbsEnergyInUnCore, config.IdleEnergyInUnCore, config.DynEnergyInUnCore, pkgID)
m.CalcDynEnergy(config.AbsEnergyInDRAM, config.IdleEnergyInDRAM, config.DynEnergyInDRAM, pkgID)
// UpdateDynEnergy calculates the dynamic energy.
func (s *Stats) UpdateDynEnergy() {
for pkgID := range s.EnergyUsage[config.AbsEnergyInPkg] {
s.CalcDynEnergy(config.AbsEnergyInPkg, config.IdleEnergyInPkg, config.DynEnergyInPkg, pkgID)
s.CalcDynEnergy(config.AbsEnergyInCore, config.IdleEnergyInCore, config.DynEnergyInCore, pkgID)
s.CalcDynEnergy(config.AbsEnergyInUnCore, config.IdleEnergyInUnCore, config.DynEnergyInUnCore, pkgID)
s.CalcDynEnergy(config.AbsEnergyInDRAM, config.IdleEnergyInDRAM, config.DynEnergyInDRAM, pkgID)
}
for sensorID := range m.EnergyUsage[config.AbsEnergyInPlatform] {
m.CalcDynEnergy(config.AbsEnergyInPlatform, config.IdleEnergyInPlatform, config.DynEnergyInPlatform, sensorID)
for sensorID := range s.EnergyUsage[config.AbsEnergyInPlatform] {
s.CalcDynEnergy(config.AbsEnergyInPlatform, config.IdleEnergyInPlatform, config.DynEnergyInPlatform, sensorID)
}
// gpu metric
// GPU metric
if config.EnabledGPU {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
for gpuID := range m.EnergyUsage[config.AbsEnergyInGPU] {
m.CalcDynEnergy(config.AbsEnergyInGPU, config.IdleEnergyInGPU, config.DynEnergyInGPU, gpuID)
for gpuID := range s.EnergyUsage[config.AbsEnergyInGPU] {
s.CalcDynEnergy(config.AbsEnergyInGPU, config.IdleEnergyInGPU, config.DynEnergyInGPU, gpuID)
}
}
}
}

// CalcDynEnergy calculate the difference between the absolute and idle energy/power
func (m *Stats) CalcDynEnergy(absM, idleM, dynM, id string) {
if _, exist := m.EnergyUsage[absM][id]; !exist {
// CalcDynEnergy calculates the difference between the absolute and idle energy/power.
func (s *Stats) CalcDynEnergy(absM, idleM, dynM, id string) {
if _, exist := s.EnergyUsage[absM][id]; !exist {
return
}
totalPower := m.EnergyUsage[absM][id].GetDelta()
klog.V(6).Infof("Absolute Energy stat: %v (%s)", m.EnergyUsage[absM], id)
totalPower := s.EnergyUsage[absM][id].GetDelta()
klog.V(6).Infof("Absolute Energy stat: %v (%s)", s.EnergyUsage[absM], id)
idlePower := uint64(0)
if idleStat, found := m.EnergyUsage[idleM][id]; found {
if idleStat, found := s.EnergyUsage[idleM][id]; found {
idlePower = idleStat.GetDelta()
klog.V(6).Infof("Idle Energy stat: %v (%s)", m.EnergyUsage[idleM], id)
klog.V(6).Infof("Idle Energy stat: %v (%s)", s.EnergyUsage[idleM], id)
}
dynPower := calcDynEnergy(totalPower, idlePower)
m.EnergyUsage[dynM].SetDeltaStat(id, dynPower)
klog.V(6).Infof("Dynamic Energy stat: %v (%s)", m.EnergyUsage[dynM], id)
s.EnergyUsage[dynM].SetDeltaStat(id, dynPower)
klog.V(6).Infof("Dynamic Energy stat: %v (%s)", s.EnergyUsage[dynM], id)
}

// calcDynEnergy calculates the dynamic energy.
func calcDynEnergy(totalE, idleE uint64) uint64 {
if (totalE == 0) || (idleE == 0) || (totalE < idleE) {
return 0
}
return totalE - idleE
}

// normalize normalizes the value if required.
func normalize(val float64, shouldNormalize bool) float64 {
if shouldNormalize {
return val / float64(config.SamplePeriodSec)
}
return val
}

// ToEstimatorValues return values regarding metricNames.
// ToEstimatorValues returns values for the specified metric names, normalized if required.
// The metrics can be related to resource utilization or power consumption.
// Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, and the power models are trained to estimate power in 1 second interval,
// it is necessary to normalize the resource utilization by the SamplePeriodSec. Note that this is important because the power curve can be different for higher or lower resource usage within 1 second interval.
func (m *Stats) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64 {
// Since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second,
// and the power models are trained to estimate power in 1 second interval. It is necessary to
// normalize the resource utilization by the SamplePeriodSec. This is important because the power
// curve can be different for higher or lower resource usage within 1 second interval.
func (s *Stats) ToEstimatorValues(featuresName []string, shouldNormalize bool) []float64 {
featureValues := []float64{}
for _, feature := range featuresName {
// verify all metrics that are part of the node resource usage metrics
if value, exists := m.ResourceUsage[feature]; exists {
// Verify all metrics that are part of the node resource usage metrics.
if value, exists := s.ResourceUsage[feature]; exists {
featureValues = append(featureValues, normalize(float64(value.SumAllDeltaValues()), shouldNormalize))
continue
}
// some features are not related to resource utilization, such as power metrics
// Some features are not related to resource utilization, such as power metrics.
switch feature {
case config.GeneralUsageMetric: // is an empty string for UNCORE and OTHER resource usage
case config.GeneralUsageMetric: // Is an empty string for UNCORE and OTHER resource usage.
featureValues = append(featureValues, 0)

case config.DynEnergyInPkg: // for dynamic PKG power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInPkg].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInPkg: // For dynamic PKG power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInPkg].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.DynEnergyInCore: // for dynamic CORE power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInCore].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInCore: // For dynamic CORE power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInCore].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.DynEnergyInDRAM: // for dynamic PKG power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInDRAM].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInDRAM: // For dynamic DRAM power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInDRAM].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.DynEnergyInUnCore: // for dynamic UNCORE power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInUnCore].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInUnCore: // For dynamic UNCORE power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInUnCore].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.DynEnergyInOther: // for dynamic OTHER power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInOther].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInOther: // For dynamic OTHER power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInOther].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.DynEnergyInPlatform: // for dynamic PLATFORM power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInPlatform].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInPlatform: // For dynamic PLATFORM power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInPlatform].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.DynEnergyInGPU: // for dynamic GPU power consumption
value := normalize(float64(m.EnergyUsage[config.DynEnergyInGPU].SumAllDeltaValues()), shouldNormalize)
case config.DynEnergyInGPU: // For dynamic GPU power consumption.
value := normalize(float64(s.EnergyUsage[config.DynEnergyInGPU].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInPkg: // for idle PKG power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInPkg].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInPkg: // For idle PKG power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInPkg].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInCore: // for idle CORE power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInCore].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInCore: // For idle CORE power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInCore].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInDRAM: // for idle PKG power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInDRAM].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInDRAM: // For idle DRAM power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInDRAM].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInUnCore: // for idle UNCORE power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInUnCore].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInUnCore: // For idle UNCORE power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInUnCore].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInOther: // for idle OTHER power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInOther].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInOther: // For idle OTHER power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInOther].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInPlatform: // for idle PLATFORM power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInPlatform].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInPlatform: // For idle PLATFORM power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInPlatform].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

case config.IdleEnergyInGPU: // for idle GPU power consumption
value := normalize(float64(m.EnergyUsage[config.IdleEnergyInGPU].SumAllDeltaValues()), shouldNormalize)
case config.IdleEnergyInGPU: // For idle GPU power consumption.
value := normalize(float64(s.EnergyUsage[config.IdleEnergyInGPU].SumAllDeltaValues()), shouldNormalize)
featureValues = append(featureValues, value)

default:
Expand Down
4 changes: 2 additions & 2 deletions pkg/collector/stats/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ type CPUS struct {
}

func RegisterBPFStats(counters []string) {
AvailableBPFMetrics = counters
defaultBPFMetrics = counters
}

func GetProcessFeatureNames() []string {
var metrics []string
// bpf counter metrics
metrics = append(metrics, AvailableBPFMetrics...)
metrics = append(metrics, defaultBPFMetrics...)
klog.V(3).Infof("Available ebpf counters: %v", metrics)

// gpu metric
Expand Down
4 changes: 2 additions & 2 deletions pkg/model/node_component_energy.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ func GetNodeComponentPowers(nodeMetrics *stats.NodeStats, isIdlePower bool) (nod

// UpdateNodeComponentEnergy sets the power model samples, get absolute powers, and set gauge value for each component energy
func UpdateNodeComponentEnergy(nodeMetrics *stats.NodeStats) {
addEnergy(nodeMetrics, stats.AvailableAbsEnergyMetrics, absPower)
addEnergy(nodeMetrics, nodeMetrics.AvailableMetrics.AbsEnergyMetrics, absPower)
}

// UpdateNodeComponentIdleEnergy sets the power model samples to zeros, get idle powers, and set gauge value for each component idle energy
func UpdateNodeComponentIdleEnergy(nodeMetrics *stats.NodeStats) {
addEnergy(nodeMetrics, stats.AvailableIdleEnergyMetrics, idlePower)
addEnergy(nodeMetrics, nodeMetrics.AvailableMetrics.IdleEnergyMetrics, idlePower)
}

func addEnergy(nodeMetrics *stats.NodeStats, metrics []string, isIdle bool) {
Expand Down

0 comments on commit d2e2cc2

Please sign in to comment.