Skip to content

Commit

Permalink
Merge pull request #1718 from maryamtahhan/config-globals-cleanup
Browse files Browse the repository at this point in the history
chore: cleanup pkg/config globals
  • Loading branch information
sthaha authored Sep 2, 2024
2 parents 295ce9d + 8b821a2 commit 05af659
Show file tree
Hide file tree
Showing 38 changed files with 635 additions and 377 deletions.
8 changes: 4 additions & 4 deletions cmd/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ func healthProbe(w http.ResponseWriter, req *http.Request) {
func main() {
start := time.Now()
klog.InitFlags(nil)
appConfig := newAppConfig()
flag.Parse()

appConfig := newAppConfig() // Initialize appConfig and define flags
flag.Parse() // Parse command-line flags
config.GetConfig() // Initialize the configuration
klog.Infof("Kepler running on version: %s", build.Version)

registry := metrics.GetRegistry()
Expand Down Expand Up @@ -149,7 +149,7 @@ func main() {

stats.InitAvailableParamAndMetrics()

if config.EnabledGPU {
if config.EnabledGPU() {
r := accelerator.GetRegistry()
if a, err := accelerator.New(accelerator.GPU, true); err == nil {
r.MustRegister(a) // Register the accelerator with the registry
Expand Down
6 changes: 3 additions & 3 deletions pkg/bpf/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (e *exporter) attach() error {

// Set program global variables
err = specs.RewriteConstants(map[string]interface{}{
"SAMPLE_RATE": int32(config.BPFSampleRate),
"SAMPLE_RATE": int32(config.GetBPFSampleRate()),
})
if err != nil {
return fmt.Errorf("error rewriting program constants: %v", err)
Expand All @@ -112,7 +112,7 @@ func (e *exporter) attach() error {
}
e.enabledSoftwareCounters[config.CPUTime] = struct{}{}

if config.ExposeIRQCounterMetrics {
if config.ExposeIRQCounterMetrics() {
e.irqLink, err = link.AttachTracing(link.TracingOptions{
Program: e.bpfObjects.KeplerIrqTrace,
AttachType: ebpf.AttachTraceRawTp,
Expand Down Expand Up @@ -148,7 +148,7 @@ func (e *exporter) attach() error {
}

// Return early if hardware counters are not enabled
if !config.ExposeHardwareCounterMetrics {
if !config.ExposeHardwareCounterMetrics() {
klog.Infof("Hardware counter metrics are disabled")
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/bpf/test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func defaultHardwareCounters() sets.Set[string] {

func defaultSoftwareCounters() sets.Set[string] {
swCounters := sets.New(config.CPUTime, config.PageCacheHit)
if config.ExposeIRQCounterMetrics {
if config.ExposeIRQCounterMetrics() {
swCounters.Insert(config.IRQNetTXLabel, config.IRQNetRXLabel, config.IRQBlockLabel)
}
return swCounters
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/energy/node_energy_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func UpdateNodeComponentsEnergy(nodeStats *stats.NodeStats, wg *sync.WaitGroup)
// UpdateNodeGPUEnergy updates each GPU power consumption. Right now we don't support other types of accelerators
func UpdateNodeGPUEnergy(nodeStats *stats.NodeStats, wg *sync.WaitGroup) {
defer wg.Done()
if config.EnabledGPU {
if config.EnabledGPU() {
if gpu := acc.GetRegistry().ActiveAcceleratorByType(acc.GPU); gpu != nil {
gpuEnergy := gpu.Device().AbsEnergyFromDevice()
for gpu, energy := range gpuEnergy {
Expand Down
10 changes: 5 additions & 5 deletions pkg/collector/metric_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ func (c *Collector) Initialize() error {
// model component decide whether/how to init
model.CreatePowerEstimatorModels(
stats.GetProcessFeatureNames(c.bpfSupportedMetrics),
stats.NodeMetadataFeatureNames,
stats.NodeMetadataFeatureValues,
stats.NodeMetadataFeatureNames(),
stats.NodeMetadataFeatureValues(),
c.bpfSupportedMetrics,
)

Expand Down Expand Up @@ -161,7 +161,7 @@ func (c *Collector) updateProcessResourceUtilizationMetrics(wg *sync.WaitGroup)
// update process metrics regarding the resource utilization to be used to calculate the energy consumption
// we first updates the bpf which is responsible to include new processes in the ProcessStats collection
resourceBpf.UpdateProcessBPFMetrics(c.bpfExporter, c.ProcessStats)
if config.EnabledGPU {
if config.EnabledGPU() {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
accelerator.UpdateProcessGPUUtilizationMetrics(c.ProcessStats, c.bpfSupportedMetrics)
}
Expand All @@ -184,7 +184,7 @@ func (c *Collector) AggregateProcessResourceUtilizationMetrics() {
// aggregate metrics per container
if config.IsExposeContainerStatsEnabled() {
if process.ContainerID != "" {
c.createContainerStatsIfNotExist(process.ContainerID, process.CGroupID, process.PID, config.EnabledEBPFCgroupID)
c.createContainerStatsIfNotExist(process.ContainerID, process.CGroupID, process.PID, config.EnabledEBPFCgroupID())
c.ContainerStats[process.ContainerID].ResourceUsage[metricName].AddDeltaStat(id, delta)
foundContainer[process.ContainerID] = true
}
Expand Down Expand Up @@ -270,7 +270,7 @@ func (c *Collector) AggregateProcessEnergyUtilizationMetrics() {
// aggregate metrics per container
if config.IsExposeContainerStatsEnabled() {
if process.ContainerID != "" {
c.createContainerStatsIfNotExist(process.ContainerID, process.CGroupID, process.PID, config.EnabledEBPFCgroupID)
c.createContainerStatsIfNotExist(process.ContainerID, process.CGroupID, process.PID, config.EnabledEBPFCgroupID())
c.ContainerStats[process.ContainerID].EnergyUsage[metricName].AddDeltaStat(id, delta)
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/metric_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ var _ = Describe("Test Collector Unit", func() {
metricCollector := newMockCollector(bpfExporter)
// The default estimator model is the ratio
bpfSupportedMetrics := bpfExporter.SupportedMetrics()
model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), stats.NodeMetadataFeatureNames, stats.NodeMetadataFeatureValues, bpfSupportedMetrics)
model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), stats.NodeMetadataFeatureNames(), stats.NodeMetadataFeatureValues(), bpfSupportedMetrics)
// update container and node metrics
metricCollector.UpdateProcessEnergyUtilizationMetrics()
metricCollector.AggregateProcessEnergyUtilizationMetrics()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]*
}

// if the pid is within a container, it will have a container ID
containerID, err := cgroup.GetContainerID(ct.CgroupId, ct.Pid, config.EnabledEBPFCgroupID)
containerID, err := cgroup.GetContainerID(ct.CgroupId, ct.Pid, config.EnabledEBPFCgroupID())
if err != nil {
klog.V(6).Infof("failed to resolve container for PID %v (command=%s): %v, set containerID=%s", ct.Pid, comm, err, utils.SystemProcessName)
}
Expand All @@ -114,7 +114,7 @@ func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]*
}

mapKey := ct.Pid
if ct.CgroupId == 1 && config.EnabledEBPFCgroupID {
if ct.CgroupId == 1 && config.EnabledEBPFCgroupID() {
// we aggregate all kernel process to minimize overhead
// all kernel process has cgroup id as 1 and pid 1 is also a kernel process
mapKey = 1
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/stats/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func benchmarkNtesting(b *testing.B, processNumber int) {

// The default estimator model is the ratio
bpfSupportedMetrics := bpf.DefaultSupportedMetrics()
model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), stats.NodeMetadataFeatureNames, stats.NodeMetadataFeatureValues, bpfSupportedMetrics)
model.CreatePowerEstimatorModels(stats.GetProcessFeatureNames(bpfSupportedMetrics), stats.NodeMetadataFeatureNames(), stats.NodeMetadataFeatureValues(), bpfSupportedMetrics)

// update container and node metrics
b.ReportAllocs()
Expand Down
33 changes: 21 additions & 12 deletions pkg/collector/stats/node_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,33 @@ import (
"github.com/sustainable-computing-io/kepler/pkg/utils"
)

var (
NodeName = GetNodeName()
NodeCPUArchitecture = getCPUArch()
NodeCPUPackageMap = getCPUPackageMap()

// NodeMetricNames holds the name of the system metadata information.
NodeMetadataFeatureNames []string = []string{"cpu_architecture"}
// SystemMetadata holds the metadata regarding the system information
NodeMetadataFeatureValues []string = []string{NodeCPUArchitecture}
)

type NodeStats struct {
Stats

// IdleResUtilization is used to determine idle pmap[string]eriods
IdleResUtilization map[string]uint64
}

// NodeCPUArchitecture returns the CPU architecture
func NodeCPUArchitecture() string {
return getCPUArch()
}

// NodeCPUPackageMap returns the CPU package map
func NodeCPUPackageMap() map[int32]string {
return getCPUPackageMap()
}

// NodeMetadataFeatureNames returns the feature names for metadata
func NodeMetadataFeatureNames() []string {
return []string{"cpu_architecture"}
}

// NodeMetadataFeatureValues returns the feature values for metadata
func NodeMetadataFeatureValues() []string {
return []string{NodeCPUArchitecture()}
}

func NewNodeStats(bpfSupportedMetrics bpf.SupportedMetrics) *NodeStats {
return &NodeStats{
Stats: *NewStats(bpfSupportedMetrics),
Expand All @@ -57,7 +66,7 @@ func (ne *NodeStats) ResetDeltaValues() {

func (ne *NodeStats) UpdateIdleEnergyWithMinValue(isComponentsSystemCollectionSupported bool) {
// gpu metric
if config.EnabledGPU {
if config.EnabledGPU() {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
ne.CalcIdleEnergy(config.AbsEnergyInGPU, config.IdleEnergyInGPU, config.GPUComputeUtilization)
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/collector/stats/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func NewStats(bpfSupportedMetrics bpf.SupportedMetrics) *Stats {
m.ResourceUsage[metricName] = types.NewUInt64StatCollection()
}

if config.EnabledGPU {
if config.EnabledGPU() {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
m.ResourceUsage[config.GPUComputeUtilization] = types.NewUInt64StatCollection()
m.ResourceUsage[config.GPUMemUtilization] = types.NewUInt64StatCollection()
Expand Down Expand Up @@ -127,7 +127,7 @@ func (m *Stats) UpdateDynEnergy() {
m.CalcDynEnergy(config.AbsEnergyInPlatform, config.IdleEnergyInPlatform, config.DynEnergyInPlatform, sensorID)
}
// gpu metric
if config.EnabledGPU {
if config.EnabledGPU() {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
for gpuID := range m.EnergyUsage[config.AbsEnergyInGPU] {
m.CalcDynEnergy(config.AbsEnergyInGPU, config.IdleEnergyInGPU, config.DynEnergyInGPU, gpuID)
Expand Down Expand Up @@ -162,7 +162,7 @@ func calcDynEnergy(totalE, idleE uint64) uint64 {

func normalize(val float64, shouldNormalize bool) float64 {
if shouldNormalize {
return val / float64(config.SamplePeriodSec)
return val / float64(config.SamplePeriodSec())
}
return val
}
Expand All @@ -181,7 +181,7 @@ func (m *Stats) ToEstimatorValues(featuresName []string, shouldNormalize bool) [
}
// some features are not related to resource utilization, such as power metrics
switch feature {
case config.GeneralUsageMetric: // is an empty string for UNCORE and OTHER resource usage
case config.GeneralUsageMetric(): // is an empty string for UNCORE and OTHER resource usage
featureValues = append(featureValues, 0)

case config.DynEnergyInPkg: // for dynamic PKG power consumption
Expand Down
3 changes: 2 additions & 1 deletion pkg/collector/stats/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (

var _ = Describe("Stats", func() {
It("Test InitAvailableParamAndMetrics", func() {
config.ExposeHardwareCounterMetrics = false
config.GetConfig()
config.SetEnabledHardwareCounterMetrics(false)
supportedMetrics := bpf.DefaultSupportedMetrics()
InitAvailableParamAndMetrics()
exp := []string{}
Expand Down
4 changes: 1 addition & 3 deletions pkg/collector/stats/test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
// SetMockedCollectorMetrics adds all metric to a process, otherwise it will not create the right usageMetric with all elements. The usageMetric is used in the Prediction Power Models
// TODO: do not use a fixed usageMetric array in the power models, a structured data is more disarable.
func SetMockedCollectorMetrics() {
config.GetConfig()
if gpu := acc.GetRegistry().ActiveAcceleratorByType(acc.GPU); gpu != nil {
err := gpu.Device().Init() // create structure instances that will be accessed to create a processMetric
klog.Fatalln(err)
Expand All @@ -50,9 +51,6 @@ func SetMockedCollectorMetrics() {
config.IdleEnergyInCore, config.IdleEnergyInDRAM, config.IdleEnergyInUnCore, config.IdleEnergyInPkg,
config.IdleEnergyInGPU, config.IdleEnergyInOther, config.IdleEnergyInPlatform,
}

NodeMetadataFeatureNames = []string{"cpu_architecture"}
NodeMetadataFeatureValues = []string{"Sandy Bridge"}
}

// CreateMockedProcessStats adds two containers with all metrics initialized
Expand Down
6 changes: 3 additions & 3 deletions pkg/collector/stats/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func GetProcessFeatureNames(bpfSupportedMetrics bpf.SupportedMetrics) []string {
klog.V(3).Infof("Available ebpf counters: %v", metrics)

// gpu metric
if config.EnabledGPU {
if config.EnabledGPU() {
if acc.GetRegistry().ActiveAcceleratorByType(acc.GPU) != nil {
gpuMetrics := []string{config.GPUComputeUtilization, config.GPUMemUtilization}
metrics = append(metrics, gpuMetrics...)
Expand All @@ -92,7 +92,7 @@ func GetProcessFeatureNames(bpfSupportedMetrics bpf.SupportedMetrics) []string {
return metrics
}

func GetNodeName() string {
func NodeName() string {
if nodeName := os.Getenv("NODE_NAME"); nodeName != "" {
return nodeName
}
Expand Down Expand Up @@ -298,7 +298,7 @@ func getCPUPmuName() (pmuName string, err error) {

func getCPUArchitecture() (string, error) {
// check if there is a CPU architecture override
cpuArchOverride := config.CPUArchOverride
cpuArchOverride := config.CPUArchOverride()
if cpuArchOverride != "" {
klog.V(2).Infof("cpu arch override: %v\n", cpuArchOverride)
return cpuArchOverride, nil
Expand Down
Loading

0 comments on commit 05af659

Please sign in to comment.