From aa822eef6c2122c1ac0a9f744d64f22d7b1bd4b0 Mon Sep 17 00:00:00 2001 From: Sunyanan Choochotkaew Date: Thu, 29 Aug 2024 00:11:47 +0900 Subject: [PATCH] feat: compute core ratio for local regressor (#1743) Signed-off-by: Sunyanan Choochotkaew --- pkg/config/config_test.go | 2 +- pkg/config/spec.go | 4 +- .../local/regressor/model_weights.go | 27 ++++------- .../estimator/local/regressor/regressor.go | 27 ++++++++--- .../local/regressor/regressor_test.go | 47 ++++++++++++++++++- pkg/model/model.go | 5 +- 6 files changed, 80 insertions(+), 32 deletions(-) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 1d13ba8827..50836f91b3 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -127,7 +127,7 @@ var _ = Describe("Test Configuration", func() { It("Test machine spec generation and read", func() { tmpPath := "./test_spec" // generate spec - spec := generateSpec() + spec := GenerateSpec() Expect(spec).NotTo(BeNil()) err := spec.saveToFile(tmpPath) Expect(err).To(BeNil()) diff --git a/pkg/config/spec.go b/pkg/config/spec.go index 7ce6d508d6..da114f3c9b 100644 --- a/pkg/config/spec.go +++ b/pkg/config/spec.go @@ -90,7 +90,7 @@ func roundToNearestHundred(value float64) int { return int(math.Round(value/100) * 100) } -func generateSpec() *MachineSpec { +func GenerateSpec() *MachineSpec { spec := &MachineSpec{} cpus, err := cpu.Info() @@ -134,7 +134,7 @@ func getDefaultMachineSpec() *MachineSpec { klog.Errorf("failed to read default spec from %s: %v", DefaultMachineSpecFilePath, err) } } - return generateSpec() + return GenerateSpec() } func readMachineSpec(path string) (*MachineSpec, error) { diff --git a/pkg/model/estimator/local/regressor/model_weights.go b/pkg/model/estimator/local/regressor/model_weights.go index b4de227dcb..457b0fab79 100644 --- a/pkg/model/estimator/local/regressor/model_weights.go +++ b/pkg/model/estimator/local/regressor/model_weights.go @@ -19,6 +19,8 @@ package regressor import ( "errors" "fmt" + + "github.com/sustainable-computing-io/kepler/pkg/config" ) var ( @@ -95,25 +97,14 @@ type NormalizedNumericalFeature struct { Weight float64 `json:"weight,omitempty"` } -// TODO: remove when PR #1684 merge -type MachineSpec struct { - Vendor string `json:"vendor"` - Processor string `json:"processor"` - Cores int `json:"cores"` - Chips int `json:"chips"` - Memory int `json:"memory"` - Frequency int `json:"frequency"` - ThreadsPerCore int `json:"threads_per_core"` -} - type ComponentModelWeights struct { - ModelName string `json:"model_name,omitempty"` - ModelMachineSpec *MachineSpec `json:"machine_spec,omitempty"` - Platform *ModelWeights `json:"platform,omitempty"` - Core *ModelWeights `json:"core,omitempty"` - Uncore *ModelWeights `json:"uncore,omitempty"` - Package *ModelWeights `json:"package,omitempty"` - DRAM *ModelWeights `json:"dram,omitempty"` + ModelName string `json:"model_name,omitempty"` + ModelMachineSpec *config.MachineSpec `json:"machine_spec,omitempty"` + Platform *ModelWeights `json:"platform,omitempty"` + Core *ModelWeights `json:"core,omitempty"` + Uncore *ModelWeights `json:"uncore,omitempty"` + Package *ModelWeights `json:"package,omitempty"` + DRAM *ModelWeights `json:"dram,omitempty"` } func (w ComponentModelWeights) String() string { diff --git a/pkg/model/estimator/local/regressor/regressor.go b/pkg/model/estimator/local/regressor/regressor.go index 212c1bd642..56bdbeeebd 100644 --- a/pkg/model/estimator/local/regressor/regressor.go +++ b/pkg/model/estimator/local/regressor/regressor.go @@ -70,11 +70,12 @@ type Regressor struct { // xidx represents the instance slide window position, where an instance can be process/process/pod/node xidx int - enabled bool - modelWeight *ComponentModelWeights - coreRatio float64 - modelPredictors map[string]Predictor - *config.MachineSpec + enabled bool + modelWeight *ComponentModelWeights + coreRatio float64 + modelPredictors map[string]Predictor + RequestMachineSpec *config.MachineSpec + DiscoveredMachineSpec *config.MachineSpec } // Start returns nil if model weight is obtainable @@ -153,7 +154,7 @@ func (r *Regressor) getWeightFromServer() (*ComponentModelWeights, error) { TrainerName: r.TrainerName, SelectFilter: r.SelectFilter, Weight: true, - MachineSpec: *r.MachineSpec, + MachineSpec: *r.RequestMachineSpec, } modelRequestJSON, err := json.Marshal(modelRequest) if err != nil { @@ -187,7 +188,7 @@ func (r *Regressor) getWeightFromServer() (*ComponentModelWeights, error) { if weightResponse.ModelName != "" { klog.V(3).Infof("Using weights trained by %s", weightResponse.ModelName) } - // TODO: set r.coreRatio from discover spec/model machine spec based on PR #1684 + r.updateCoreRatio(weightResponse.ModelMachineSpec) return &weightResponse, nil } @@ -209,6 +210,7 @@ func (r *Regressor) loadWeightFromURLorLocal() (*ComponentModelWeights, error) { if err != nil { return nil, fmt.Errorf("model unmarshal error: %v (%s)", err, string(body)) } + r.updateCoreRatio(content.ModelMachineSpec) return &content, nil } @@ -317,6 +319,17 @@ func (r *Regressor) GetComponentsPower(isIdlePower bool) ([]source.NodeComponent return nodeComponentsPower, nil } +// updateCoreRatio sets coreRatio attribute as a ratio of the discovered number of cores over the cores of machine used for training a model +func (r *Regressor) updateCoreRatio(mSpec *config.MachineSpec) { + if mSpec == nil || r.DiscoveredMachineSpec == nil { + return + } + if r.DiscoveredMachineSpec.Cores > 0 && mSpec.Cores >= r.DiscoveredMachineSpec.Cores { + r.coreRatio = float64(r.DiscoveredMachineSpec.Cores) / float64(mSpec.Cores) + klog.Infof("Update core ratio to %.2f for computing %s idle power", r.coreRatio, r.EnergySource) + } +} + // GetComponentsPower returns GPU Power in Watts associated to each each process func (r *Regressor) GetGPUPower(isIdlePower bool) ([]uint64, error) { return []uint64{}, fmt.Errorf("current power model does not support GPUs") diff --git a/pkg/model/estimator/local/regressor/regressor_test.go b/pkg/model/estimator/local/regressor/regressor_test.go index 630a09f52e..f7126366b7 100644 --- a/pkg/model/estimator/local/regressor/regressor_test.go +++ b/pkg/model/estimator/local/regressor/regressor_test.go @@ -65,11 +65,19 @@ var ( SampleDramNumbericalVars = map[string]NormalizedNumericalFeature{ "cache_miss": {Weight: 1.0, Scale: 2}, } - DummyWeightHandler = http.HandlerFunc(genHandlerFunc([]float64{})) + DummyWeightHandler = http.HandlerFunc(genHandlerFunc([]float64{})) + DummyModelName = "dummy" + ModelCores = config.GenerateSpec().Cores + ExpectedAbsPowerFromDummyWeightHandler = 2500 + ExpectedIdlePowerFromDummyWeightHandler = 2000 ) func GenPlatformModelWeights(curveFitWeights []float64) ComponentModelWeights { return ComponentModelWeights{ + ModelName: DummyModelName, + ModelMachineSpec: &config.MachineSpec{ + Cores: ModelCores, + }, Platform: genWeights(SampleCoreNumericalVars, curveFitWeights), } } @@ -127,7 +135,8 @@ func genRegressor(outputType types.ModelOutputType, energySource, modelServerEnd ModelWeightsURL: modelWeightsURL, ModelWeightsFilepath: modelWeightFilepath, TrainerName: trainerName, - MachineSpec: config.GetMachineSpec(), + RequestMachineSpec: config.GetMachineSpec(), + DiscoveredMachineSpec: config.GenerateSpec(), } } @@ -274,4 +283,38 @@ var _ = Describe("Test Regressor Weight Unit (default trainer)", func() { Expect(err).NotTo(HaveOccurred()) }) }) + + Context("with core ratio", Ordered, func() { + DescribeTable("Test core ratio computation", func(discoveredCore, modelCores int, expectedCoreRatio float64) { + ModelCores = modelCores + testServer := httptest.NewServer(DummyWeightHandler) + modelWeightFilepath := config.GetDefaultPowerModelURL(types.DynPower.String(), types.PlatformEnergySource) + r := genRegressor(types.DynPower, types.PlatformEnergySource, testServer.URL, "", modelWeightFilepath, "") + r.DiscoveredMachineSpec = &config.MachineSpec{ + Cores: discoveredCore, + } + err := r.Start() + Expect(err).To(BeNil()) + r.ResetSampleIdx() + for _, processFeatureValues := range processFeatureValues { + r.AddProcessFeatureValues(processFeatureValues) // add samples to the power model + } + powers, err := r.GetPlatformPower(false) + Expect(err).NotTo(HaveOccurred()) + Expect(len(powers)).Should(Equal(len(processFeatureValues))) + // TODO: verify if the power makes sense + Expect(powers[0]).Should(BeEquivalentTo(ExpectedAbsPowerFromDummyWeightHandler)) + idlePowers, err := r.GetPlatformPower(true) + Expect(err).NotTo(HaveOccurred()) + Expect(len(idlePowers)).Should(Equal(len(processFeatureValues))) + expectedIdlePower := uint64(float64(ExpectedIdlePowerFromDummyWeightHandler) * expectedCoreRatio) + Expect(idlePowers[0]).Should(BeEquivalentTo(expectedIdlePower)) + }, + Entry("equal core", 16, 16, 1.0), + Entry("VM core ratio 0.25)", 4, 16, 0.25), + Entry("VM core ratio 4)", 16, 4, 1.0), + Entry("invalid discovered core", 0, 16, 1.0), + Entry("invalid model core", 16, 0, 1.0), + ) + }) }) diff --git a/pkg/model/model.go b/pkg/model/model.go index 5db81b2608..7c6f4d4a15 100644 --- a/pkg/model/model.go +++ b/pkg/model/model.go @@ -114,14 +114,15 @@ func createPowerModelEstimator(modelConfig *types.ModelConfig) (PowerModelInterf FloatFeatureNames: featuresNames, SystemMetaDataFeatureNames: modelConfig.SystemMetaDataFeatureNames, SystemMetaDataFeatureValues: modelConfig.SystemMetaDataFeatureValues, - MachineSpec: config.GetMachineSpec(), + RequestMachineSpec: config.GetMachineSpec(), + DiscoveredMachineSpec: config.GenerateSpec(), } err := model.Start() if err != nil { return nil, err } klog.V(3).Infof("Using Power Model %s", modelConfig.ModelOutputType.String()) - klog.Infof("Machine Spec: %v", model.MachineSpec) + klog.Infof("Requesting for Machine Spec: %v", model.RequestMachineSpec) return model, nil case types.EstimatorSidecar: