From dea318e431cf40a789ad47d45415469044de85d7 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Mon, 18 Nov 2024 16:14:06 +1000 Subject: [PATCH] feat(config): allow config dir to be passed as argument This commit now allows default hard-coded config directory to be passed as an argument. This allow quickly changing between different configurations to be stored separately and to switch between them (especially during development). The commit also * simplifies global config initialization by ensuring it is initialised at the time kepler's main function is executed and fail with error that step fails. * It also cleans up use of config object to read CGroup info by creating a `realSystem` struct that handles this functionality. Signed-off-by: Sunil Thaha --- cmd/exporter/exporter.go | 38 +++--- pkg/collector/metric_collector_test.go | 6 + pkg/collector/stats/benchmark_test.go | 2 + pkg/collector/stats/container_stats_test.go | 5 + pkg/collector/stats/node_stats_test.go | 3 + pkg/collector/stats/process_stats_test.go | 5 + pkg/collector/stats/stats_test.go | 4 +- pkg/collector/stats/test_utils.go | 1 - pkg/config/config.go | 114 ++++++------------ pkg/config/config_test.go | 3 +- pkg/config/types.go | 3 +- pkg/libvirt/resolve_vm_test.go | 3 +- pkg/manager/manager_test.go | 5 +- pkg/model/benchmark_test.go | 2 + pkg/model/estimator/local/ratio_model_test.go | 4 + .../local/regressor/exponential_test.go | 6 + .../estimator/local/regressor/linear_test.go | 5 + .../local/regressor/logarithm_test.go | 6 + .../local/regressor/logistic_test.go | 6 + .../local/regressor/regressor_test.go | 7 +- pkg/model/process_energy_test.go | 2 + pkg/sensors/accelerator/accelerator_test.go | 19 +++ pkg/sensors/platform/source/redfish_test.go | 5 +- pkg/sensors/power_suite_test.go | 4 + 24 files changed, 156 insertions(+), 102 deletions(-) diff --git a/cmd/exporter/exporter.go b/cmd/exporter/exporter.go index 2687a2f13f..35efa6da23 100644 --- a/cmd/exporter/exporter.go +++ b/cmd/exporter/exporter.go @@ -53,6 +53,7 @@ const ( // AppConfig holds the configuration info for the application. type AppConfig struct { + BaseDir string Address string MetricsPath string EnableGPU bool @@ -69,21 +70,22 @@ type AppConfig struct { func newAppConfig() *AppConfig { // Initialize flags - _config := &AppConfig{} - flag.StringVar(&_config.Address, "address", "0.0.0.0:8888", "bind address") - flag.StringVar(&_config.MetricsPath, "metrics-path", "/metrics", "metrics path") - flag.BoolVar(&_config.EnableGPU, "enable-gpu", false, "whether enable gpu (need to have libnvidia-ml installed)") - flag.BoolVar(&_config.EnableEBPFCgroupID, "enable-cgroup-id", true, "whether enable eBPF to collect cgroup id") - flag.BoolVar(&_config.ExposeHardwareCounterMetrics, "expose-hardware-counter-metrics", true, "whether expose hardware counter as prometheus metrics") - flag.BoolVar(&_config.EnableMSR, "enable-msr", false, "whether MSR is allowed to obtain energy data") - flag.StringVar(&_config.Kubeconfig, "kubeconfig", "", "absolute path to the kubeconfig file, if empty we use the in-cluster configuration") - flag.BoolVar(&_config.ApiserverEnabled, "apiserver", true, "if apiserver is disabled, we collect pod information from kubelet") - flag.StringVar(&_config.RedfishCredFilePath, "redfish-cred-file-path", "", "path to the redfish credential file") - flag.BoolVar(&_config.ExposeEstimatedIdlePower, "expose-estimated-idle-power", false, "Whether to expose the estimated idle power as a metric") - flag.StringVar(&_config.MachineSpecFilePath, "machine-spec", "", "path to the machine spec file in json format") - flag.BoolVar(&_config.DisablePowerMeter, "disable-power-meter", false, "whether manually disable power meter read and forcefully apply the estimator for node powers") - - return _config + cfg := &AppConfig{} + flag.StringVar(&cfg.BaseDir, "config-dir", config.BaseDir, "path to config base directory") + flag.StringVar(&cfg.Address, "address", "0.0.0.0:8888", "bind address") + flag.StringVar(&cfg.MetricsPath, "metrics-path", "/metrics", "metrics path") + flag.BoolVar(&cfg.EnableGPU, "enable-gpu", false, "whether enable gpu (need to have libnvidia-ml installed)") + flag.BoolVar(&cfg.EnableEBPFCgroupID, "enable-cgroup-id", true, "whether enable eBPF to collect cgroup id") + flag.BoolVar(&cfg.ExposeHardwareCounterMetrics, "expose-hardware-counter-metrics", true, "whether expose hardware counter as prometheus metrics") + flag.BoolVar(&cfg.EnableMSR, "enable-msr", false, "whether MSR is allowed to obtain energy data") + flag.StringVar(&cfg.Kubeconfig, "kubeconfig", "", "absolute path to the kubeconfig file, if empty we use the in-cluster configuration") + flag.BoolVar(&cfg.ApiserverEnabled, "apiserver", true, "if apiserver is disabled, we collect pod information from kubelet") + flag.StringVar(&cfg.RedfishCredFilePath, "redfish-cred-file-path", "", "path to the redfish credential file") + flag.BoolVar(&cfg.ExposeEstimatedIdlePower, "expose-estimated-idle-power", false, "Whether to expose the estimated idle power as a metric") + flag.StringVar(&cfg.MachineSpecFilePath, "machine-spec", "", "path to the machine spec file in json format") + flag.BoolVar(&cfg.DisablePowerMeter, "disable-power-meter", false, "whether manually disable power meter read and forcefully apply the estimator for node powers") + + return cfg } func healthProbe(w http.ResponseWriter, req *http.Request) { @@ -99,7 +101,11 @@ func main() { klog.InitFlags(nil) appConfig := newAppConfig() // Initialize appConfig and define flags flag.Parse() // Parse command-line flags - config.GetConfig() // Initialize the configuration + + if _, err := config.Initialize(appConfig.BaseDir); err != nil { + klog.Fatalf("Failed to initialize config: %v", err) + } + klog.Infof("Kepler running on version: %s", build.Version) registry := metrics.GetRegistry() diff --git a/pkg/collector/metric_collector_test.go b/pkg/collector/metric_collector_test.go index e2c6e973a7..00c069ff80 100644 --- a/pkg/collector/metric_collector_test.go +++ b/pkg/collector/metric_collector_test.go @@ -40,7 +40,13 @@ func newMockCollector(mockAttacher bpf.Exporter) *Collector { var _ = Describe("Test Collector Unit", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) + }) + It("Get container power", func() { + bpfExporter := bpf.NewMockExporter(bpf.DefaultSupportedMetrics()) metricCollector := newMockCollector(bpfExporter) // The default estimator model is the ratio diff --git a/pkg/collector/stats/benchmark_test.go b/pkg/collector/stats/benchmark_test.go index ff87b6ff07..8210f3cce8 100644 --- a/pkg/collector/stats/benchmark_test.go +++ b/pkg/collector/stats/benchmark_test.go @@ -21,10 +21,12 @@ import ( "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/collector" "github.com/sustainable-computing-io/kepler/pkg/collector/stats" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model" ) func benchmarkNtesting(b *testing.B, processNumber int) { + _, _ = config.Initialize(".") // enable metrics stats.SetMockedCollectorMetrics() // create node node metrics diff --git a/pkg/collector/stats/container_stats_test.go b/pkg/collector/stats/container_stats_test.go index a031b1a0e1..e48511cf69 100644 --- a/pkg/collector/stats/container_stats_test.go +++ b/pkg/collector/stats/container_stats_test.go @@ -24,6 +24,11 @@ import ( var _ = Describe("Test Container Metric", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) + }) + It("Test ResetDeltaValues", func() { SetMockedCollectorMetrics() c := NewContainerStats("containerA", "podA", "test", "containerIDA") diff --git a/pkg/collector/stats/node_stats_test.go b/pkg/collector/stats/node_stats_test.go index a7f06e7db0..efce0a050c 100644 --- a/pkg/collector/stats/node_stats_test.go +++ b/pkg/collector/stats/node_stats_test.go @@ -31,6 +31,9 @@ var _ = Describe("Test Node Metric", func() { ) BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) + SetMockedCollectorMetrics() processMetrics = CreateMockedProcessStats(2) nodeMetrics = CreateMockedNodeStats() diff --git a/pkg/collector/stats/process_stats_test.go b/pkg/collector/stats/process_stats_test.go index c40820fd10..dc7afb4865 100644 --- a/pkg/collector/stats/process_stats_test.go +++ b/pkg/collector/stats/process_stats_test.go @@ -8,6 +8,11 @@ import ( var _ = Describe("ProcessMetric", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) + }) + It("Test ResetDeltaValues", func() { SetMockedCollectorMetrics() metrics := CreateMockedProcessStats(1) diff --git a/pkg/collector/stats/stats_test.go b/pkg/collector/stats/stats_test.go index 6ed6e7fd69..c692be6bf1 100644 --- a/pkg/collector/stats/stats_test.go +++ b/pkg/collector/stats/stats_test.go @@ -8,7 +8,9 @@ import ( var _ = Describe("Stats", func() { It("Test InitAvailableParamAndMetrics", func() { - config.GetConfig() + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) + config.SetEnabledHardwareCounterMetrics(false) exp := []string{} Expect(len(GetProcessFeatureNames()) >= len(exp)).To(BeTrue()) diff --git a/pkg/collector/stats/test_utils.go b/pkg/collector/stats/test_utils.go index dd14bbf0ab..00d38b052c 100644 --- a/pkg/collector/stats/test_utils.go +++ b/pkg/collector/stats/test_utils.go @@ -31,7 +31,6 @@ const ( // SetMockedCollectorMetrics adds all metric to a process, otherwise it will not create the right usageMetric with all elements. The usageMetric is used in the Prediction Power Models // TODO: do not use a fixed usageMetric array in the power models, a structured data is more disarable. func SetMockedCollectorMetrics() { - config.GetConfig() if gpu := acc.GetActiveAcceleratorByType(config.GPU); gpu != nil { err := gpu.Device().Init() // create structure instances that will be accessed to create a processMetric klog.Fatalln(err) diff --git a/pkg/config/config.go b/pkg/config/config.go index 88a6dbeb8b..3785de6279 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -110,7 +110,20 @@ type Config struct { } // newConfig creates and returns a new Config instance. -func newConfig() *Config { +func newConfig() (*Config, error) { + absBaseDir, err := filepath.Abs(BaseDir) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path for config-dir: %s: %w", BaseDir, err) + } + + s, err := os.Stat(absBaseDir) + if os.IsNotExist(err) { + return nil, fmt.Errorf("config-dir %s does not exist", BaseDir) + } + if !s.IsDir() { + return nil, fmt.Errorf("config-dir %s is not a directory", BaseDir) + } + return &Config{ ModelServerService: fmt.Sprintf("kepler-model-server.%s.svc.cluster.local", getConfig("KEPLER_NAMESPACE", defaultNamespace)), Kepler: getKeplerConfig(), @@ -121,20 +134,22 @@ func newConfig() *Config { Libvirt: getLibvirtConfig(), DCGMHostEngineEndpoint: getConfig("NVIDIA_HOSTENGINE_ENDPOINT", defaultDCGMHostEngineEndpoint), KernelVersion: float32(0), - } + }, nil } -// GetConfig returns the singleton Config instance, creating it if necessary. -func GetConfig() *Config { - once.Do(func() { - instance = newConfig() - }) +// Instance returns the singleton Config instance +func Instance() *Config { return instance } -// SetConfig replaces the global instance -func SetConfig(conf *Config) { - instance = conf +// Initialize initializes the global instance once and returns an error if +func Initialize(baseDir string) (*Config, error) { + var err error + once.Do(func() { + BaseDir = baseDir + instance, err = newConfig() + }) + return instance, err } func getKeplerConfig() KeplerConfig { @@ -229,7 +244,7 @@ func getConfig(key, defaultValue string) string { } // return config file value if there is one - configFile := filepath.Join(configDir, key) + configFile := filepath.Join(BaseDir, key) if value, err := os.ReadFile(configFile); err == nil { return strings.TrimSpace(bytes.NewBuffer(value).String()) } @@ -269,21 +284,19 @@ func logBoolConfigs() { } func LogConfigs() { + klog.V(5).Infof("config-dir: %s", BaseDir) logBoolConfigs() } func SetRedfishCredFilePath(credFilePath string) { - ensureConfigInitialized() instance.Redfish.CredFilePath = credFilePath } func SetRedfishProbeIntervalInSeconds(interval string) { - ensureConfigInitialized() instance.Redfish.ProbeIntervalInSeconds = interval } func SetRedfishSkipSSLVerify(skipSSLVerify bool) { - ensureConfigInitialized() instance.Redfish.SkipSSLVerify = skipSSLVerify } @@ -294,9 +307,9 @@ func SetEnabledEBPFCgroupID(enabled bool) { // set to false if any config source set it to false enabled = enabled && instance.Kepler.EnabledEBPFCgroupID klog.Infoln("using gCgroup ID in the BPF program:", enabled) - instance.KernelVersion = getKernelVersion(instance) + instance.KernelVersion = getKernelVersion(&realSystem{}) klog.Infoln("kernel version:", instance.KernelVersion) - if (enabled) && (instance.KernelVersion >= cGroupIDMinKernelVersion) && (isCGroupV2(instance)) { + if (enabled) && (instance.KernelVersion >= cGroupIDMinKernelVersion) && (isCGroupV2(&realSystem{})) { instance.Kepler.EnabledEBPFCgroupID = true } else { instance.Kepler.EnabledEBPFCgroupID = false @@ -305,7 +318,6 @@ func SetEnabledEBPFCgroupID(enabled bool) { // SetEnabledHardwareCounterMetrics enables the exposure of hardware counter metrics func SetEnabledHardwareCounterMetrics(enabled bool) { - ensureConfigInitialized() // set to false is any config source set it to false instance.Kepler.ExposeHardwareCounterMetrics = enabled && instance.Kepler.ExposeHardwareCounterMetrics } @@ -318,7 +330,6 @@ func SetEnabledHardwareCounterMetrics(enabled bool) { // Idle power prediction is limited to bare-metal or single VM setups. // Know the number of running VMs becomes crucial for achieving a fair distribution of idle power, particularly when following the GHG (Greenhouse Gas) protocol. func SetEnabledIdlePower(enabled bool) { - ensureConfigInitialized() // set to true is any config source set it to true or if system power metrics are available instance.Kepler.ExposeIdlePowerMetrics = enabled || instance.Kepler.ExposeIdlePowerMetrics if instance.Kepler.ExposeIdlePowerMetrics { @@ -328,54 +339,45 @@ func SetEnabledIdlePower(enabled bool) { // SetEnabledGPU enables the exposure of gpu metrics func SetEnabledGPU(enabled bool) { - ensureConfigInitialized() // set to true if any config source set it to true instance.Kepler.EnabledGPU = enabled || instance.Kepler.EnabledGPU } func SetModelServerEnable(enabled bool) { - ensureConfigInitialized() instance.Model.ModelServerEnable = enabled || instance.Model.ModelServerEnable } // SetEnabledMSR enables the exposure of MSR metrics func SetEnabledMSR(enabled bool) { - ensureConfigInitialized() // set to true if any config source set it to true instance.Kepler.EnabledMSR = enabled || instance.Kepler.EnabledMSR } // SetKubeConfig set kubeconfig file func SetKubeConfig(k string) { - ensureConfigInitialized() instance.Kepler.KubeConfig = k } // SetEnableAPIServer enables Kepler to watch apiserver func SetEnableAPIServer(enabled bool) { - ensureConfigInitialized() instance.Kepler.EnableAPIServer = enabled } func SetEstimatorConfig(modelName, selectFilter string) { - ensureConfigInitialized() instance.Kepler.EstimatorModel = modelName instance.Kepler.EstimatorSelectFilter = selectFilter } func SetModelServerEndpoint(serverEndpoint string) { - ensureConfigInitialized() instance.Model.ModelServerEndpoint = serverEndpoint } func SetMachineSpecFilePath(specFilePath string) { - ensureConfigInitialized() instance.Kepler.MachineSpecFilePath = specFilePath } // GetMachineSpec initializes a map of MachineSpecValues from MACHINE_SPEC func GetMachineSpec() *MachineSpec { - ensureConfigInitialized() if instance.Kepler.MachineSpecFilePath != "" { if spec, err := readMachineSpec(instance.Kepler.MachineSpecFilePath); err == nil { return spec @@ -398,13 +400,18 @@ func SetGPUUsageMetric(metric string) { instance.Metrics.GPUUsageMetric = metric } -func (c *Config) getUnixName() (unix.Utsname, error) { +type realSystem struct { +} + +var _ Client = &realSystem{} + +func (c *realSystem) getUnixName() (unix.Utsname, error) { var utsname unix.Utsname err := unix.Uname(&utsname) return utsname, err } -func (c *Config) getCgroupV2File() string { +func (c *realSystem) getCgroupV2File() string { return cGroupV2Path } @@ -449,7 +456,7 @@ func isCGroupV2(c Client) bool { // Get cgroup version, return 1 or 2 func GetCGroupVersion() int { - if isCGroupV2(instance) { + if isCGroupV2(&realSystem{}) { return 2 } else { return 1 @@ -458,70 +465,51 @@ func GetCGroupVersion() int { // InitModelConfigMap initializes map of config from MODEL_CONFIG func InitModelConfigMap() { - ensureConfigInitialized() if instance.Model.ModelConfigValues == nil { instance.Model.ModelConfigValues = GetModelConfigMap() } } -// EnsureConfigInitialized checks if the instance is initialized, and if not, initializes it. -func ensureConfigInitialized() { - if instance == nil { - once.Do(func() { - instance = newConfig() - }) - } -} - // IsIdlePowerEnabled always return true if Kepler has access to system power metrics. // However, if pre-trained power models are being used, Kepler should only expose metrics if the user is aware of the implications. func IsIdlePowerEnabled() bool { - ensureConfigInitialized() return instance.Kepler.ExposeIdlePowerMetrics } // IsExposeProcessStatsEnabled returns false if process metrics are disabled to minimize overhead in the Kepler standalone mode. func IsExposeProcessStatsEnabled() bool { - ensureConfigInitialized() return instance.Kepler.EnableProcessStats } // IsExposeContainerStatsEnabled returns false if container metrics are disabled to minimize overhead in the Kepler standalone mode. func IsExposeContainerStatsEnabled() bool { - ensureConfigInitialized() return instance.Kepler.ExposeContainerStats } // IsExposeVMStatsEnabled returns false if VM metrics are disabled to minimize overhead. func IsExposeVMStatsEnabled() bool { - ensureConfigInitialized() return instance.Kepler.ExposeVMStats } // IsExposeBPFMetricsEnabled returns false if BPF Metrics metrics are disabled to minimize overhead. func IsExposeBPFMetricsEnabled() bool { - ensureConfigInitialized() return instance.Kepler.ExposeBPFMetrics } // IsExposeComponentPowerEnabled returns false if component power metrics are disabled to minimize overhead. func IsExposeComponentPowerEnabled() bool { - ensureConfigInitialized() return instance.Kepler.ExposeComponentPower } func IsEnabledMSR() bool { - ensureConfigInitialized() return instance.Kepler.EnabledMSR } func IsModelServerEnabled() bool { - ensureConfigInitialized() return instance.Model.ModelServerEnable } func ModelServerEndpoint() string { - ensureConfigInitialized() return instance.Model.ModelServerEndpoint } @@ -541,31 +529,25 @@ func GetModelConfigMap() map[string]string { } func GetLibvirtMetadataURI() string { - ensureConfigInitialized() return instance.Libvirt.MetadataURI } func GetLibvirtMetadataToken() string { - ensureConfigInitialized() return instance.Libvirt.MetadataToken } func ExposeIRQCounterMetrics() bool { - ensureConfigInitialized() return instance.Kepler.ExposeIRQCounterMetrics } func GetBPFSampleRate() int { - ensureConfigInitialized() return instance.Kepler.BPFSampleRate } func GetRedfishCredFilePath() string { - ensureConfigInitialized() return instance.Redfish.CredFilePath } func GetRedfishProbeIntervalInSeconds() int { - ensureConfigInitialized() // convert string "redfishProbeIntervalInSeconds" to int probeInterval, err := strconv.Atoi(instance.Redfish.ProbeIntervalInSeconds) if err != nil { @@ -576,99 +558,79 @@ func GetRedfishProbeIntervalInSeconds() int { } func GetRedfishSkipSSLVerify() bool { - ensureConfigInitialized() return instance.Redfish.SkipSSLVerify } func GetMockACPIPowerPath() string { - ensureConfigInitialized() return instance.Kepler.MockACPIPowerPath } func ExposeHardwareCounterMetrics() bool { - ensureConfigInitialized() return instance.Kepler.ExposeHardwareCounterMetrics } func EnabledGPU() bool { - ensureConfigInitialized() return instance.Kepler.EnabledGPU } func SamplePeriodSec() uint64 { - ensureConfigInitialized() return instance.SamplePeriodSec } func CoreUsageMetric() string { - ensureConfigInitialized() return instance.Metrics.CoreUsageMetric } func DRAMUsageMetric() string { - ensureConfigInitialized() return instance.Metrics.DRAMUsageMetric } func GPUUsageMetric() string { - ensureConfigInitialized() return instance.Metrics.GPUUsageMetric } func CPUArchOverride() string { - ensureConfigInitialized() return instance.Kepler.CPUArchOverride } func GeneralUsageMetric() string { - ensureConfigInitialized() return instance.Metrics.GeneralUsageMetric } func KubeConfig() string { - ensureConfigInitialized() return instance.Kepler.KubeConfig } func EnabledEBPFCgroupID() bool { - ensureConfigInitialized() return instance.Kepler.EnabledEBPFCgroupID } func NodePlatformPowerKey() string { - ensureConfigInitialized() return instance.Model.NodePlatformPowerKey } func NodeComponentsPowerKey() string { - ensureConfigInitialized() return instance.Model.NodeComponentsPowerKey } func ContainerPlatformPowerKey() string { - ensureConfigInitialized() return instance.Model.ContainerPlatformPowerKey } func ModelConfigValues(k string) string { - ensureConfigInitialized() return instance.Model.ModelConfigValues[k] } func ContainerComponentsPowerKey() string { - ensureConfigInitialized() return instance.Model.ContainerComponentsPowerKey } func ProcessPlatformPowerKey() string { - ensureConfigInitialized() return instance.Model.ProcessPlatformPowerKey } func ProcessComponentsPowerKey() string { - ensureConfigInitialized() return instance.Model.ProcessComponentsPowerKey } func APIServerEnabled() bool { - ensureConfigInitialized() return instance.Kepler.EnableAPIServer } @@ -681,11 +643,9 @@ func BPFSwCounters() []string { } func DCGMHostEngineEndpoint() string { - ensureConfigInitialized() return instance.DCGMHostEngineEndpoint } func ExcludeSwapperProcess() bool { - ensureConfigInitialized() return instance.Kepler.ExcludeSwapperProcess } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index bc338f3e3a..49d6b295a2 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -115,12 +115,11 @@ var _ = Describe("Test Configuration", func() { Expect(float32(-1)).To(Equal(getKernelVersion(mockc))) }) It("Test real kernel version", func() { - conf := GetConfig() // we assume running on Linux env should be bigger than 3.0 // env now, so make it 3.0 as minimum test: switch runtime.GOOS { case "linux": - Expect(true).To(Equal(getKernelVersion(conf) > 3.0)) + Expect(true).To(Equal(getKernelVersion(&realSystem{}) > 3.0)) default: // no test } diff --git a/pkg/config/types.go b/pkg/config/types.go index 30524ca58e..16c0dc567a 100644 --- a/pkg/config/types.go +++ b/pkg/config/types.go @@ -91,7 +91,6 @@ const ( MaxIRQ = 10 // defaultSamplePeriodSec is the time in seconds that the reader will wait before reading the metrics again defaultSamplePeriodSec = 3 - configDir = "/etc/kepler/kepler.config" defaultKubeConfig = "" defaultBPFSampleRate = 0 defaultCPUArchOverride = "" @@ -106,3 +105,5 @@ const ( DefaultMachineSpecFilePath = "/etc/kepler/models/machine/spec.json" defaultDCGMHostEngineEndpoint = "localhost:5555" ) + +var BaseDir string = "/etc/kepler/kepler.config" diff --git a/pkg/libvirt/resolve_vm_test.go b/pkg/libvirt/resolve_vm_test.go index 308f67ce15..64ed11cd0e 100644 --- a/pkg/libvirt/resolve_vm_test.go +++ b/pkg/libvirt/resolve_vm_test.go @@ -38,7 +38,8 @@ var _ = Describe("Test LibVirt", func() { BeforeEach(func() { mockProcDir = createTempDir() - config.GetConfig() + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) }) AfterEach(func() { diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 37a99a1e51..f57e836ad4 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -4,14 +4,17 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/sustainable-computing-io/kepler/pkg/bpf" + "github.com/sustainable-computing-io/kepler/pkg/config" ) var _ = Describe("Manager", func() { It("Should work properly", func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) bpfExporter := bpf.NewMockExporter(bpf.DefaultSupportedMetrics()) CollectorManager := New(bpfExporter) - err := CollectorManager.Start() + err = CollectorManager.Start() Expect(err).NotTo(HaveOccurred()) }) diff --git a/pkg/model/benchmark_test.go b/pkg/model/benchmark_test.go index 64212f102b..a470bac183 100644 --- a/pkg/model/benchmark_test.go +++ b/pkg/model/benchmark_test.go @@ -22,6 +22,7 @@ import ( "github.com/sustainable-computing-io/kepler/pkg/bpf" "github.com/sustainable-computing-io/kepler/pkg/collector" "github.com/sustainable-computing-io/kepler/pkg/collector/stats" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model" ) @@ -30,6 +31,7 @@ const ( ) func benchmarkNtesting(b *testing.B, processNumber int) { + _, _ = config.Initialize(".") // enable metrics stats.SetMockedCollectorMetrics() // create node node metrics diff --git a/pkg/model/estimator/local/ratio_model_test.go b/pkg/model/estimator/local/ratio_model_test.go index 55e9d7a2c8..2d9f1d070c 100644 --- a/pkg/model/estimator/local/ratio_model_test.go +++ b/pkg/model/estimator/local/ratio_model_test.go @@ -26,6 +26,10 @@ import ( ) var _ = Describe("Test Ratio Unit", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) + }) It("GetProcessEnergyRatio", func() { stats.SetMockedCollectorMetrics() processStats := stats.CreateMockedProcessStats(3) diff --git a/pkg/model/estimator/local/regressor/exponential_test.go b/pkg/model/estimator/local/regressor/exponential_test.go index 8a97b087c4..2e5a286d8b 100644 --- a/pkg/model/estimator/local/regressor/exponential_test.go +++ b/pkg/model/estimator/local/regressor/exponential_test.go @@ -20,6 +20,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model/types" ) @@ -29,6 +30,11 @@ var ( ) var _ = Describe("Test Exponential Predictor Unit", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).ShouldNot(HaveOccurred()) + }) + It("Get Node Platform Power By Exponential Regression", func() { powers := GetNodePlatformPowerFromDummyServer(dummyExponentialWeightHandler, types.ExponentialTrainer) Expect(simplifyOutputInMilliJoules(powers[0])).Should(BeEquivalentTo(4000)) diff --git a/pkg/model/estimator/local/regressor/linear_test.go b/pkg/model/estimator/local/regressor/linear_test.go index 5e35eb38ea..407cfe16fe 100644 --- a/pkg/model/estimator/local/regressor/linear_test.go +++ b/pkg/model/estimator/local/regressor/linear_test.go @@ -20,10 +20,15 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model/types" ) var _ = Describe("Test Linear Predictor Unit", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).ShouldNot(HaveOccurred()) + }) It("Get Node Platform Power By Linear Regression", func() { powers := GetNodePlatformPowerFromDummyServer(DummyWeightHandler, types.LinearRegressionTrainer) Expect(powers[0]).Should(BeEquivalentTo(3000)) diff --git a/pkg/model/estimator/local/regressor/logarithm_test.go b/pkg/model/estimator/local/regressor/logarithm_test.go index 0b31c92d36..22e860c030 100644 --- a/pkg/model/estimator/local/regressor/logarithm_test.go +++ b/pkg/model/estimator/local/regressor/logarithm_test.go @@ -20,6 +20,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model/types" ) @@ -29,6 +30,11 @@ var ( ) var _ = Describe("Test Logarithmic Predictor Unit", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).ShouldNot(HaveOccurred()) + }) + It("Get Node Platform Power By Logarithmic Regression", func() { powers := GetNodePlatformPowerFromDummyServer(dummyLogarithmicWeightHandler, types.LogarithmicTrainer) Expect(simplifyOutputInMilliJoules(powers[0])).Should(BeEquivalentTo(2000)) diff --git a/pkg/model/estimator/local/regressor/logistic_test.go b/pkg/model/estimator/local/regressor/logistic_test.go index 6d4df59038..ed2f318062 100644 --- a/pkg/model/estimator/local/regressor/logistic_test.go +++ b/pkg/model/estimator/local/regressor/logistic_test.go @@ -20,6 +20,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/model/types" ) @@ -29,6 +30,11 @@ var ( ) var _ = Describe("Test Logistic Predictor Unit", func() { + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).ShouldNot(HaveOccurred()) + }) + It("Get Node Platform Power By Logistic Regression", func() { powers := GetNodePlatformPowerFromDummyServer(dummyLogisticWeightHandler, types.LogisticTrainer) Expect(simplifyOutputInMilliJoules(powers[0])).Should(BeEquivalentTo(2000)) diff --git a/pkg/model/estimator/local/regressor/regressor_test.go b/pkg/model/estimator/local/regressor/regressor_test.go index 8f2d24eb77..eea04d799a 100644 --- a/pkg/model/estimator/local/regressor/regressor_test.go +++ b/pkg/model/estimator/local/regressor/regressor_test.go @@ -122,7 +122,6 @@ func genHandlerFunc(curvefit []float64, trainerName string) (handlerFunc func(w } func genRegressor(outputType types.ModelOutputType, energySource, modelServerEndpoint, modelWeightsURL, modelWeightFilepath, trainerName string) Regressor { - config.GetConfig() config.SetModelServerEnable(true) config.SetModelServerEndpoint(modelServerEndpoint) return Regressor{ @@ -169,6 +168,12 @@ func GetNodeComponentsPowerFromDummyServer(handler http.HandlerFunc, trainer str } var _ = Describe("Test Regressor Weight Unit (default trainer)", func() { + + BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).ShouldNot(HaveOccurred()) + }) + Context("with dummy model server", func() { It("Get Node Platform Power By Default Regression with ModelServerEndpoint", func() { powers := GetNodePlatformPowerFromDummyServer(DummyWeightHandler, "") diff --git a/pkg/model/process_energy_test.go b/pkg/model/process_energy_test.go index 38ceb299b2..a723e1fff4 100644 --- a/pkg/model/process_energy_test.go +++ b/pkg/model/process_energy_test.go @@ -36,6 +36,8 @@ var _ = Describe("ProcessPower", func() { Context("with manually defined node power", func() { BeforeEach(func() { + _, err := config.Initialize(".") + Expect(err).NotTo(HaveOccurred()) // we need to disable the system real time power metrics for testing since we add mock values or use power model estimator components.SetIsSystemCollectionSupported(false) platform.SetIsSystemCollectionSupported(false) diff --git a/pkg/sensors/accelerator/accelerator_test.go b/pkg/sensors/accelerator/accelerator_test.go index 59f1ed96f2..145a45fd14 100644 --- a/pkg/sensors/accelerator/accelerator_test.go +++ b/pkg/sensors/accelerator/accelerator_test.go @@ -18,6 +18,7 @@ package accelerator import ( "testing" + "github.com/sustainable-computing-io/kepler/pkg/config" "github.com/sustainable-computing-io/kepler/pkg/sensors/accelerator/devices" ) @@ -73,6 +74,10 @@ func TestRegistry(t *testing.T) { }, } + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var a Accelerator @@ -134,6 +139,10 @@ func TestActiveAcceleratorByType(t *testing.T) { }, } + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { tt.setup() @@ -172,6 +181,10 @@ func TestCreateAndRegister(t *testing.T) { }, } + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { registry := tt.setup() @@ -211,6 +224,9 @@ func TestShutdown(t *testing.T) { }, }, } + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -228,6 +244,9 @@ func TestShutdown(t *testing.T) { } func TestAcceleratorMethods(t *testing.T) { + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } registry := &Registry{ Registry: map[string]Accelerator{}, } diff --git a/pkg/sensors/platform/source/redfish_test.go b/pkg/sensors/platform/source/redfish_test.go index 93542a81a5..2caf3d2bd0 100644 --- a/pkg/sensors/platform/source/redfish_test.go +++ b/pkg/sensors/platform/source/redfish_test.go @@ -27,7 +27,10 @@ import ( ) func TestRedFishClient_IsPowerSupported(t *testing.T) { - config.GetConfig() + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } + // Create a mock HTTP server server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/redfish/v1/Systems" { diff --git a/pkg/sensors/power_suite_test.go b/pkg/sensors/power_suite_test.go index e0869b04ae..97fdfa62c4 100644 --- a/pkg/sensors/power_suite_test.go +++ b/pkg/sensors/power_suite_test.go @@ -5,9 +5,13 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/sustainable-computing-io/kepler/pkg/config" ) func TestPower(t *testing.T) { + if _, err := config.Initialize("."); err != nil { + t.Fatal(err) + } RegisterFailHandler(Fail) RunSpecs(t, "Power Suite") }