kubeflow · shashank-iitbhu · Aug 21, 2024 · Aug 25, 2024 · Aug 25, 2024 · Aug 26, 2024
diff --git a/.github/workflows/e2e-test-pytorch-mnist.yaml b/.github/workflows/e2e-test-pytorch-mnist.yaml
@@ -41,5 +41,7 @@ jobs:
           - "long-running-resume,from-volume-resume,median-stop"
           # others
           - "grid,bayesian-optimization,tpe,multivariate-tpe,cma-es,hyperband"
+          - "hyperopt-distribution"
           - "file-metrics-collector,pytorchjob-mnist"
           - "median-stop-with-json-format,file-metrics-collector-with-json-format"
+
diff --git a/examples/v1beta1/hp-tuning/hyperopt-distribution.yaml b/examples/v1beta1/hp-tuning/hyperopt-distribution.yaml
@@ -0,0 +1,74 @@
+---
+apiVersion: kubeflow.org/v1beta1
+kind: Experiment
+metadata:
+  namespace: kubeflow
+  name: hyperopt-distribution
+spec:
+  objective:
+    type: minimize
+    goal: 0.05
+    objectiveMetricName: loss
+  algorithm:
+    algorithmName: random
+  parallelTrialCount: 3
+  maxTrialCount: 12
+  maxFailedTrialCount: 3
+  parameters:
+    - name: lr
+      parameterType: double
+      feasibleSpace:
+        min: "0.01"
+        max: "0.05"
+        step: "0.01"
+        distribution: normal
+    - name: momentum
+      parameterType: double
+      feasibleSpace:
+        min: "0.001"
+        max: "1"
+        distribution: uniform
+    - name: epochs
+      parameterType: int
+      feasibleSpace:
+        min: "1"
+        max: "3"
+        distribution: logUniform
+    - name: batch_size
+      parameterType: int
+      feasibleSpace:
+        min: "32"
+        max: "64"
+        distribution: logNormal
+  trialTemplate:
+    primaryContainerName: training-container
+    trialParameters:
+      - name: learningRate
+        description: Learning rate for the training model
+        reference: lr
+      - name: momentum
+        description: Momentum for the training model
+        reference: momentum
+      - name: epochs
+        description: Epochs
+        reference: epochs
+      - name: batchSize
+        description: Batch Size
+        reference: batch_size
+    trialSpec:
+      apiVersion: batch/v1
+      kind: Job
+      spec:
+        template:
+          spec:
+            containers:
+              - name: training-container
+                image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
+                command:
+                  - "python3"
+                  - "/opt/pytorch-mnist/mnist.py"
+                  - "--epochs=${trialParameters.epochs}"
+                  - "--batch-size=${trialParameters.batchSize}"
+                  - "--lr=${trialParameters.learningRate}"
+                  - "--momentum=${trialParameters.momentum}"
+            restartPolicy: Never
diff --git a/pkg/apis/manager/v1beta1/api.pb.go b/pkg/apis/manager/v1beta1/api.pb.go
diff --git a/pkg/apis/manager/v1beta1/api.proto b/pkg/apis/manager/v1beta1/api.proto
@@ -101,11 +101,11 @@ enum ParameterType {
  * Distribution types for HyperParameter.
  */
 enum Distribution {
-    UNIFORM = 0;
-    LOG_UNIFORM = 1;
-    NORMAL = 2;
-    LOG_NORMAL = 3;
-    DISTRIBUTION_UNKNOWN = 4;
+    DISTRIBUTION_UNSPECIFIED = 0;
+    UNIFORM = 1;
+    LOG_UNIFORM = 2;
+    NORMAL = 3;
+    LOG_NORMAL = 4;
 }
 
 /**

diff --git a/pkg/apis/manager/v1beta1/python/api_pb2.py b/pkg/apis/manager/v1beta1/python/api_pb2.py
diff --git a/pkg/apis/manager/v1beta1/python/api_pb2.pyi b/pkg/apis/manager/v1beta1/python/api_pb2.pyi
@@ -16,11 +16,11 @@ class ParameterType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
 
 class Distribution(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     __slots__ = ()
+    DISTRIBUTION_UNSPECIFIED: _ClassVar[Distribution]
     UNIFORM: _ClassVar[Distribution]
     LOG_UNIFORM: _ClassVar[Distribution]
     NORMAL: _ClassVar[Distribution]
     LOG_NORMAL: _ClassVar[Distribution]
-    DISTRIBUTION_UNKNOWN: _ClassVar[Distribution]
 
 class ObjectiveType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
     __slots__ = ()
@@ -39,11 +39,11 @@ DOUBLE: ParameterType
 INT: ParameterType
 DISCRETE: ParameterType
 CATEGORICAL: ParameterType
+DISTRIBUTION_UNSPECIFIED: Distribution
 UNIFORM: Distribution
 LOG_UNIFORM: Distribution
 NORMAL: Distribution
 LOG_NORMAL: Distribution
-DISTRIBUTION_UNKNOWN: Distribution
 UNKNOWN: ObjectiveType
 MINIMIZE: ObjectiveType
 MAXIMIZE: ObjectiveType

diff --git a/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient.go b/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient.go
@@ -532,22 +532,12 @@ func convertParameterType(typ experimentsv1beta1.ParameterType) suggestionapi.Pa
 }
 
 func convertFeasibleSpace(fs experimentsv1beta1.FeasibleSpace) *suggestionapi.FeasibleSpace {
-	distribution := convertDistribution(fs.Distribution)
-	if distribution == suggestionapi.Distribution_DISTRIBUTION_UNKNOWN {
-		return &suggestionapi.FeasibleSpace{
-			Max:  fs.Max,
-			Min:  fs.Min,
-			List: fs.List,
-			Step: fs.Step,
-		}
-	}
-
 	return &suggestionapi.FeasibleSpace{
 		Max:          fs.Max,
 		Min:          fs.Min,
 		List:         fs.List,
 		Step:         fs.Step,
-		Distribution: distribution,
+		Distribution: convertDistribution(fs.Distribution),
 	}
 }
 
@@ -562,7 +552,7 @@ func convertDistribution(typ experimentsv1beta1.Distribution) suggestionapi.Dist
 	case experimentsv1beta1.DistributionLogNormal:
 		return suggestionapi.Distribution_LOG_NORMAL
 	default:
-		return suggestionapi.Distribution_DISTRIBUTION_UNKNOWN
+		return suggestionapi.Distribution_DISTRIBUTION_UNSPECIFIED
 	}
 }
 

diff --git a/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient_test.go b/pkg/controller.v1beta1/suggestion/suggestionclient/suggestionclient_test.go
@@ -618,7 +618,7 @@ func TestConvertDistribution(t *testing.T) {
 		},
 		{
 			inDistribution:       experimentsv1beta1.DistributionUnknown,
-			expectedDistribution: suggestionapi.Distribution_DISTRIBUTION_UNKNOWN,
+			expectedDistribution: suggestionapi.Distribution_DISTRIBUTION_UNSPECIFIED,
 			testDescription:      "Convert unknown distribution",
 		},
 	}

diff --git a/pkg/suggestion/v1beta1/hyperopt/base_service.py b/pkg/suggestion/v1beta1/hyperopt/base_service.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import logging
+import math
 
 import hyperopt
 import numpy as np
 
+from pkg.apis.manager.v1beta1.python import api_pb2
 from pkg.suggestion.v1beta1.internal.constant import (
     CATEGORICAL,
     DISCRETE,
@@ -62,14 +64,87 @@ def create_hyperopt_domain(self):
         # hyperopt.hp.uniform('x2', -10, 10)}
         hyperopt_search_space = {}
         for param in self.search_space.params:
-            if param.type == INTEGER:
-                hyperopt_search_space[param.name] = hyperopt.hp.quniform(
-                    param.name, float(param.min), float(param.max), float(param.step)
-                )
-            elif param.type == DOUBLE:
-                hyperopt_search_space[param.name] = hyperopt.hp.uniform(
-                    param.name, float(param.min), float(param.max)
-                )
+            if param.type in [INTEGER, DOUBLE]:
+                if param.distribution == api_pb2.UNIFORM or param.distribution is None:
+                    # Uniform distribution: values are sampled between min and max.
+                    # If step is defined, we use the quantized version quniform.
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.quniform(
+                            param.name,
+                            float(param.min),
+                            float(param.max),
+                            float(param.step),
+                        )
+                    elif param.type == INTEGER:
+                        hyperopt_search_space[param.name] = hyperopt.hp.uniformint(
+                            param.name, float(param.min), float(param.max)
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.uniform(
+                            param.name, float(param.min), float(param.max)
+                        )
+                elif param.distribution == api_pb2.LOG_UNIFORM:
+                    # Log-uniform distribution: used for parameters that vary exponentially.
+                    # We convert min and max to their logarithmic scale using math.log, because
+                    # the log-uniform distribution is applied over the logarithmic range.
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.qloguniform(
+                            param.name,
+                            math.log(float(param.min)),
+                            math.log(float(param.max)),
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.loguniform(
+                            param.name,
+                            math.log(float(param.min)),
+                            math.log(float(param.max)),
+                        )
+                elif param.distribution == api_pb2.NORMAL:
+                    # Normal distribution: used when values are centered around the mean (mu)
+                    # and spread out by sigma. We calculate mu as the midpoint between
+                    # min and max, and sigma as (max - min) / 6. This is based on the assumption
+                    # that 99.7% of the values in a normal distribution fall within ±3 sigma.
+                    mu = (float(param.min) + float(param.max)) / 2
+                    sigma = (float(param.max) - float(param.min)) / 6
+
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.qnormal(
+                            param.name,
+                            mu,
+                            sigma,
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.normal(
+                            param.name,
+                            mu,
+                            sigma,
+                        )
+                elif param.distribution == api_pb2.LOG_NORMAL:
+                    # Log-normal distribution: applies when the logarithm
+                    # of the parameter follows a normal distribution.
+                    # We convert min and max to logarithmic scale and calculate
+                    # mu and sigma similarly to the normal distribution,
+                    # but on the log-transformed values to ensure the distribution is correct.
+                    log_min = math.log(float(param.min))
+                    log_max = math.log(float(param.max))
 if param.FeasibleSpace.Max == "" && param.FeasibleSpace.Min == "" { 
 	allErrs = append(allErrs, field.Required(parametersPath.Index(i).Child("feasibleSpace").Child("max"), 
 		fmt.Sprintf("feasibleSpace.max or feasibleSpace.min must be specified for parameterType: %v", param.ParameterType))) 
 } 
 if param.FeasibleSpace.Max == "" && param.FeasibleSpace.Min == "" { 
 	allErrs = append(allErrs, field.Required(parametersPath.Index(i).Child("feasibleSpace").Child("max"), 
 		fmt.Sprintf("feasibleSpace.max or feasibleSpace.min must be specified for parameterType: %v", param.ParameterType))) 
 } 
+                    mu = (log_min + log_max) / 2
+                    sigma = (log_max - log_min) / 6
+
+                    if param.step:
+                        hyperopt_search_space[param.name] = hyperopt.hp.qlognormal(
+                            param.name,
+                            mu,
+                            sigma,
+                            float(param.step),
+                        )
+                    else:
+                        hyperopt_search_space[param.name] = hyperopt.hp.lognormal(
+                            param.name,
+                            mu,
+                            sigma,
+                        )
             elif param.type == CATEGORICAL or param.type == DISCRETE:
                 hyperopt_search_space[param.name] = hyperopt.hp.choice(
                     param.name, param.list

diff --git a/pkg/suggestion/v1beta1/internal/constant.py b/pkg/suggestion/v1beta1/internal/constant.py
@@ -19,3 +19,8 @@
 DOUBLE = "DOUBLE"
 CATEGORICAL = "CATEGORICAL"
 DISCRETE = "DISCRETE"
+
+UNIFORM = "UNIFORM"
+LOG_UNIFORM = "LOG_UNIFORM"
+NORMAL = "NORMAL"
+LOG_NORMAL = "LOG_NORMAL"
diff --git a/pkg/suggestion/v1beta1/internal/search_space.py b/pkg/suggestion/v1beta1/internal/search_space.py
@@ -82,25 +82,36 @@ def __str__(self):
 
     @staticmethod
     def convert_parameter(p):
+        distribution = (
+            p.feasible_space.distribution
+            if p.feasible_space.distribution != ""
+            and p.feasible_space.distribution is not None
+            and p.feasible_space.distribution != api.DISTRIBUTION_UNSPECIFIED
+            else None
+        )
+
         if p.parameter_type == api.INT:
             # Default value for INT parameter step is 1
-            step = 1
-            if p.feasible_space.step is not None and p.feasible_space.step != "":
-                step = p.feasible_space.step
+            step = p.feasible_space.step if p.feasible_space.step else 1
             return HyperParameter.int(
-                p.name, p.feasible_space.min, p.feasible_space.max, step
+                p.name, p.feasible_space.min, p.feasible_space.max, step, distribution
             )
+
         elif p.parameter_type == api.DOUBLE:
             return HyperParameter.double(
                 p.name,
                 p.feasible_space.min,
                 p.feasible_space.max,
                 p.feasible_space.step,
+                distribution,
             )
+
         elif p.parameter_type == api.CATEGORICAL:
             return HyperParameter.categorical(p.name, p.feasible_space.list)
+
         elif p.parameter_type == api.DISCRETE:
             return HyperParameter.discrete(p.name, p.feasible_space.list)
+
         else:
             logger.error(
                 "Cannot get the type for the parameter: %s (%s)",
@@ -110,33 +121,35 @@ def convert_parameter(p):
 
 
 class HyperParameter(object):
-    def __init__(self, name, type_, min_, max_, list_, step):
+    def __init__(self, name, type_, min_, max_, list_, step, distribution=None):
         self.name = name
         self.type = type_
         self.min = min_
         self.max = max_
         self.list = list_
         self.step = step
+        self.distribution = distribution
 
     def __str__(self):
-        if self.type == constant.INTEGER or self.type == constant.DOUBLE:
+        if self.type in [constant.INTEGER, constant.DOUBLE]:
             return (
-                "HyperParameter(name: {}, type: {}, min: {}, max: {}, step: {})".format(
-                    self.name, self.type, self.min, self.max, self.step
-                )
+                f"HyperParameter(name: {self.name}, type: {self.type}, min: {self.min}, "
+                f"max: {self.max}, step: {self.step}, distribution: {self.distribution})"
             )
         else:
             return "HyperParameter(name: {}, type: {}, list: {})".format(
                 self.name, self.type, ", ".join(self.list)
             )
 
     @staticmethod
-    def int(name, min_, max_, step):
-        return HyperParameter(name, constant.INTEGER, min_, max_, [], step)
+    def int(name, min_, max_, step, distribution=None):
+        return HyperParameter(
+            name, constant.INTEGER, min_, max_, [], step, distribution
+        )
 
     @staticmethod
-    def double(name, min_, max_, step):
-        return HyperParameter(name, constant.DOUBLE, min_, max_, [], step)
+    def double(name, min_, max_, step, distribution=None):
+        return HyperParameter(name, constant.DOUBLE, min_, max_, [], step, distribution)
 
     @staticmethod
     def categorical(name, lst):

diff --git a/pkg/webhook/v1beta1/experiment/validator/validator.go b/pkg/webhook/v1beta1/experiment/validator/validator.go
@@ -284,7 +284,7 @@ func (g *DefaultValidator) validateParameters(parameters []experimentsv1beta1.Pa
 					allErrs = append(allErrs, field.Invalid(parametersPath.Index(i).Child("feasibleSpace").Child("list"),
 						param.FeasibleSpace.List, fmt.Sprintf("feasibleSpace.list is not supported for parameterType: %v", param.ParameterType)))
 				}
-				if param.FeasibleSpace.Max == "" && param.FeasibleSpace.Min == "" {
+				if param.FeasibleSpace.Max == "" || param.FeasibleSpace.Min == "" {
 					allErrs = append(allErrs, field.Required(parametersPath.Index(i).Child("feasibleSpace").Child("max"),
 						fmt.Sprintf("feasibleSpace.max or feasibleSpace.min must be specified for parameterType: %v", param.ParameterType)))
 				}