Skip to content

Commit

Permalink
Add a Sample Rate config (#942)
Browse files Browse the repository at this point in the history
* Add a sample rate config

Signed-off-by: Michael Mercier <[email protected]>

* Fix: use static PATH in the container build

Signed-off-by: Michael Mercier <[email protected]>

* Fix comments

Signed-off-by: Michael Mercier <[email protected]>

* Fix lint error and cast sampleRate to uint64

Signed-off-by: Michael Mercier <[email protected]>

---------

Signed-off-by: Michael Mercier <[email protected]>
  • Loading branch information
mickours authored Sep 21, 2023
1 parent 5272ee2 commit 8db29a4
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ container_build:
$(CTR_CMD) run --rm \
--network host \
-v $(base_dir):/kepler:Z -w /kepler \
-e GOROOT=/usr/local/go -e PATH=$(PATH):/usr/local/go/bin \
-e GOROOT=/usr/local/go -e PATH=/usr/bin:/bin:/sbin:/usr/local/bin:/usr/local/go/bin \
$(BUILDER_IMAGE) \
git config --global --add safe.directory /kepler && make build

Expand Down
6 changes: 4 additions & 2 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ const (
// MaxIRQ is the maximum number of IRQs to be monitored
MaxIRQ = 10

// SamplePeriodSec is the time in seconds that the reader will wait before reading the metrics again
SamplePeriodSec = 3
// defaultSamplePeriodSec is the time in seconds that the reader will wait before reading the metrics again
defaultSamplePeriodSec = 3
)

var (
Expand Down Expand Up @@ -93,6 +93,8 @@ var (
GpuUsageMetric = getConfig("GPU_USAGE_METRIC", GPUSMUtilization) // no metric (evenly divided)
GeneralUsageMetric = getConfig("GENERAL_USAGE_METRIC", defaultMetricValue) // for uncategorized energy

SamplePeriodSec = uint64(getIntConfig("SAMPLE_PERIOD_SEC", defaultSamplePeriodSec))

versionRegex = regexp.MustCompile(`^(\d+)\.(\d+).`)

configPath = "/etc/kepler/kepler.config"
Expand Down
6 changes: 3 additions & 3 deletions pkg/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
"github.com/sustainable-computing-io/kepler/pkg/kubernetes"
)

const (
samplePeriod = config.SamplePeriodSec * 1000 * time.Millisecond
var (
samplePeriod = time.Duration(config.SamplePeriodSec * 1000 * uint64(time.Millisecond))
)

type CollectorManager struct {
Expand All @@ -48,7 +48,7 @@ func New() *CollectorManager {
manager.PrometheusCollector.ContainersMetrics = &manager.MetricCollector.ContainersMetrics
manager.PrometheusCollector.ProcessMetrics = &manager.MetricCollector.ProcessMetrics
manager.PrometheusCollector.VMMetrics = &manager.MetricCollector.VMMetrics
manager.PrometheusCollector.SamplePeriodSec = config.SamplePeriodSec
manager.PrometheusCollector.SamplePeriodSec = float64(config.SamplePeriodSec)
// configure the wather
manager.Watcher = kubernetes.NewObjListWatcher()
manager.Watcher.Mx = &manager.PrometheusCollector.Mx
Expand Down
4 changes: 2 additions & 2 deletions pkg/model/node_platform_energy.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ func UpdateNodePlatformEnergy(nodeMetrics *collector_metric.NodeMetrics) {
platformPower := GetNodePlatformPower(nodeMetrics, absPower)
for id, power := range platformPower {
// convert power to energy
platformPower[id] = power * config.SamplePeriodSec
platformPower[id] = power * float64(config.SamplePeriodSec)
}
nodeMetrics.SetNodePlatformEnergy(platformPower, gauge, absPower)
platformPower = GetNodePlatformPower(nodeMetrics, idlePower)
for id, power := range platformPower {
// convert power to energy
platformPower[id] = power * config.SamplePeriodSec
platformPower[id] = power * float64(config.SamplePeriodSec)
}
nodeMetrics.SetNodePlatformEnergy(platformPower, gauge, idlePower)
}
2 changes: 1 addition & 1 deletion pkg/power/accelerator/gpu/source/gpu_nvml.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func (n *GPUNvml) GetAbsEnergyFromGPU() []uint32 {
}
// since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, it is
// necessary to calculate the energy consumption for the entire waiting period
energy := uint32(float64(power) * config.SamplePeriodSec)
energy := uint32(uint64(power) * config.SamplePeriodSec)
gpuEnergy = append(gpuEnergy, energy)
}
return gpuEnergy
Expand Down
2 changes: 1 addition & 1 deletion pkg/power/platform/source/acpi.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (a *ACPI) GetAbsEnergyFromPlatform() (map[string]float64, error) {
if err == nil {
// since Kepler collects metrics at intervals of SamplePeriodSec, which is greater than 1 second, it is
// necessary to calculate the energy consumption for the entire waiting period
power[sensorIDPrefix+strconv.Itoa(int(i))] = float64(currPower) / 1000 * config.SamplePeriodSec /*miliJoules*/
power[sensorIDPrefix+strconv.Itoa(int(i))] = float64(currPower / 1000 * config.SamplePeriodSec) /*miliJoules*/
} else {
return power, err
}
Expand Down

0 comments on commit 8db29a4

Please sign in to comment.