Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GSOC] hyperopt suggestion service logic update #2412

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
2 changes: 2 additions & 0 deletions .github/workflows/e2e-test-pytorch-mnist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,7 @@ jobs:
- "long-running-resume,from-volume-resume,median-stop"
# others
- "grid,bayesian-optimization,tpe,multivariate-tpe,cma-es,hyperband"
- "hyperopt-distribution"
- "file-metrics-collector,pytorchjob-mnist"
- "median-stop-with-json-format,file-metrics-collector-with-json-format"

81 changes: 81 additions & 0 deletions examples/v1beta1/hp-tuning/hyperopt-distribution.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
---
apiVersion: kubeflow.org/v1beta1
kind: Experiment
metadata:
namespace: kubeflow
name: hyperopt-distribution
spec:
objective:
type: minimize
goal: 0.2
objectiveMetricName: loss
algorithm:
algorithmName: random
parallelTrialCount: 3
maxTrialCount: 12
maxFailedTrialCount: 3
parameters:
- name: lr
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.05"
step: "0.01"
distribution: "uniform"
- name: momentum
parameterType: double
feasibleSpace:
min: "0.5"
max: "0.9"
distribution: "logUniform"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add other distribution in this example to make sure we will validate them:

normal
logNormal

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

- name: weight_decay
parameterType: double
feasibleSpace:
min: "0.01"
max: "0.05"
distribution: "normal"
- name: dropout_rate
parameterType: double
feasibleSpace:
min: "0.1"
max: "0.5"
step: "0.01"
distribution: "logNormal"
trialTemplate:
primaryContainerName: training-container
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: momentum
description: Momentum for the training model
reference: momentum
- name: weightDecay
description: Weight decay for the training model
reference: weight_decay
- name: dropoutRate
description: Dropout rate for the training model
reference: dropout_rate
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
- "--batch-size=16"
- "--lr=${trialParameters.learningRate}"
- "--momentum=${trialParameters.momentum}"
- "--weight-decay=${trialParameters.weightDecay}"
- "--dropout-rate=${trialParameters.dropoutRate}"
resources:
limits:
memory: "1Gi"
cpu: "0.5"
restartPolicy: Never
14 changes: 14 additions & 0 deletions examples/v1beta1/trial-images/pytorch-mnist/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,20 @@ def main():
metavar="M",
help="SGD momentum (default: 0.5)",
)
parser.add_argument(
"--weight-decay",
type=float,
default=0.01,
metavar="WD",
help="Weight decay for regularization (default: 0.01)",
)
parser.add_argument(
"--dropout-rate",
type=float,
default=0.5,
metavar="DR",
help="Dropout rate for the model (default: 0.5)",
)
parser.add_argument(
"--no-cuda", action="store_true", default=False, help="disables CUDA training"
)
Expand Down
180 changes: 90 additions & 90 deletions pkg/apis/manager/v1beta1/api.pb.go

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions pkg/apis/manager/v1beta1/api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ enum ParameterType {
* Distribution types for HyperParameter.
*/
enum Distribution {
UNIFORM = 0;
LOG_UNIFORM = 1;
NORMAL = 2;
LOG_NORMAL = 3;
DISTRIBUTION_UNKNOWN = 4;
DISTRIBUTION_UNSPECIFIED = 0;
UNIFORM = 1;
LOG_UNIFORM = 2;
NORMAL = 3;
LOG_NORMAL = 4;
}

/**
Expand Down
24 changes: 12 additions & 12 deletions pkg/apis/manager/v1beta1/python/api_pb2.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pkg/apis/manager/v1beta1/python/api_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ class ParameterType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):

class Distribution(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
DISTRIBUTION_UNSPECIFIED: _ClassVar[Distribution]
UNIFORM: _ClassVar[Distribution]
LOG_UNIFORM: _ClassVar[Distribution]
NORMAL: _ClassVar[Distribution]
LOG_NORMAL: _ClassVar[Distribution]
DISTRIBUTION_UNKNOWN: _ClassVar[Distribution]

class ObjectiveType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
Expand All @@ -39,11 +39,11 @@ DOUBLE: ParameterType
INT: ParameterType
DISCRETE: ParameterType
CATEGORICAL: ParameterType
DISTRIBUTION_UNSPECIFIED: Distribution
UNIFORM: Distribution
LOG_UNIFORM: Distribution
NORMAL: Distribution
LOG_NORMAL: Distribution
DISTRIBUTION_UNKNOWN: Distribution
UNKNOWN: ObjectiveType
MINIMIZE: ObjectiveType
MAXIMIZE: ObjectiveType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,22 +532,12 @@ func convertParameterType(typ experimentsv1beta1.ParameterType) suggestionapi.Pa
}

func convertFeasibleSpace(fs experimentsv1beta1.FeasibleSpace) *suggestionapi.FeasibleSpace {
distribution := convertDistribution(fs.Distribution)
if distribution == suggestionapi.Distribution_DISTRIBUTION_UNKNOWN {
return &suggestionapi.FeasibleSpace{
Max: fs.Max,
Min: fs.Min,
List: fs.List,
Step: fs.Step,
}
}

return &suggestionapi.FeasibleSpace{
Max: fs.Max,
Min: fs.Min,
List: fs.List,
Step: fs.Step,
Distribution: distribution,
Distribution: convertDistribution(fs.Distribution),
}
}

Expand All @@ -562,7 +552,7 @@ func convertDistribution(typ experimentsv1beta1.Distribution) suggestionapi.Dist
case experimentsv1beta1.DistributionLogNormal:
return suggestionapi.Distribution_LOG_NORMAL
default:
return suggestionapi.Distribution_DISTRIBUTION_UNKNOWN
return suggestionapi.Distribution_DISTRIBUTION_UNSPECIFIED
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ func TestConvertDistribution(t *testing.T) {
},
{
inDistribution: experimentsv1beta1.DistributionUnknown,
expectedDistribution: suggestionapi.Distribution_DISTRIBUTION_UNKNOWN,
expectedDistribution: suggestionapi.Distribution_DISTRIBUTION_UNSPECIFIED,
testDescription: "Convert unknown distribution",
},
}
Expand Down
64 changes: 59 additions & 5 deletions pkg/suggestion/v1beta1/hyperopt/base_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import hyperopt
import numpy as np

from pkg.apis.manager.v1beta1.python import api_pb2
from pkg.suggestion.v1beta1.internal.constant import (
CATEGORICAL,
DISCRETE,
Expand Down Expand Up @@ -63,13 +64,66 @@ def create_hyperopt_domain(self):
hyperopt_search_space = {}
for param in self.search_space.params:
if param.type == INTEGER:
hyperopt_search_space[param.name] = hyperopt.hp.quniform(
param.name, float(param.min), float(param.max), float(param.step)
)
elif param.type == DOUBLE:
hyperopt_search_space[param.name] = hyperopt.hp.uniform(
hyperopt_search_space[param.name] = hyperopt.hp.uniformint(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If parameter is int, why we can't support other distributions like lognormal ?

Copy link
Contributor Author

@shashank-iitbhu shashank-iitbhu Sep 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Distributions like uniform quniform loguniform normal etc return float values. They are designed to sample from a range of values that can take any real number (float), which might not make sense if we're looking for an integer value.
Although we can definitely add support for these distributions when parameter is int also. Should we do this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tenzen-y @kubeflow/wg-training-leads @shashank-iitbhu Should we round this float value to int if user wants to use this distribution and int parameter type ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tenzen-y @kubeflow/wg-training-leads @shashank-iitbhu Should we round this float value to int if user wants to use this distribution and int parameter type ?

SGTM
Users can specify the double parameter type if they want to compute more exactly.
But, documentation of this restriction for int parameter type would be better.

param.name, float(param.min), float(param.max)
)
elif param.type == DOUBLE:
if param.distribution == api_pb2.UNIFORM or param.distribution is None:
if param.step:
hyperopt_search_space[param.name] = hyperopt.hp.quniform(
param.name,
float(param.min),
float(param.max),
float(param.step),
)
else:
hyperopt_search_space[param.name] = hyperopt.hp.uniform(
param.name, float(param.min), float(param.max)
)
elif param.distribution == api_pb2.LOG_UNIFORM:
if param.step:
hyperopt_search_space[param.name] = hyperopt.hp.qloguniform(
param.name,
float(param.min),
float(param.max),
float(param.step),
)
else:
hyperopt_search_space[param.name] = hyperopt.hp.loguniform(
param.name, float(param.min), float(param.max)
)
elif param.distribution == api_pb2.NORMAL:
mu = (float(param.min) + float(param.max)) / 2
sigma = (float(param.max) - float(param.min)) / 6
Copy link
Contributor Author

@shashank-iitbhu shashank-iitbhu Sep 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I followed this article to determine the value of sigma from min and max.
cc @tenzen-y @andreyvelich

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
sigma = (float(param.max) - float(param.min)) / 6
// We consider the normal distribution based on the range of ±3 sigma.
sigma = (float(param.max) - float(param.min)) / 6

if param.step:
hyperopt_search_space[param.name] = hyperopt.hp.qnormal(
param.name,
mu,
sigma,
float(param.step),
)
else:
hyperopt_search_space[param.name] = hyperopt.hp.normal(
param.name,
mu,
sigma,
)
elif param.distribution == api_pb2.LOG_NORMAL:
mu = (float(param.min) + float(param.max)) / 2
sigma = (float(param.max) - float(param.min)) / 6
if param.step:
hyperopt_search_space[param.name] = hyperopt.hp.qlognormal(
param.name,
mu,
sigma,
float(param.step),
)
else:
hyperopt_search_space[param.name] = hyperopt.hp.lognormal(
param.name,
mu,
sigma,
)
elif param.type == CATEGORICAL or param.type == DISCRETE:
hyperopt_search_space[param.name] = hyperopt.hp.choice(
param.name, param.list
Expand Down
5 changes: 5 additions & 0 deletions pkg/suggestion/v1beta1/internal/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@
DOUBLE = "DOUBLE"
CATEGORICAL = "CATEGORICAL"
DISCRETE = "DISCRETE"

UNIFORM = "UNIFORM"
LOG_UNIFORM = "LOG_UNIFORM"
NORMAL = "NORMAL"
LOG_NORMAL = "LOG_NORMAL"
39 changes: 26 additions & 13 deletions pkg/suggestion/v1beta1/internal/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,25 +82,36 @@ def __str__(self):

@staticmethod
def convert_parameter(p):
distribution = (
p.feasible_space.distribution
if p.feasible_space.distribution != ""
and p.feasible_space.distribution is not None
and p.feasible_space.distribution != api.DISTRIBUTION_UNSPECIFIED
else None
)

if p.parameter_type == api.INT:
# Default value for INT parameter step is 1
step = 1
if p.feasible_space.step is not None and p.feasible_space.step != "":
step = p.feasible_space.step
step = p.feasible_space.step if p.feasible_space.step else 1
return HyperParameter.int(
p.name, p.feasible_space.min, p.feasible_space.max, step
p.name, p.feasible_space.min, p.feasible_space.max, step, distribution
)

elif p.parameter_type == api.DOUBLE:
return HyperParameter.double(
p.name,
p.feasible_space.min,
p.feasible_space.max,
p.feasible_space.step,
distribution,
)

elif p.parameter_type == api.CATEGORICAL:
return HyperParameter.categorical(p.name, p.feasible_space.list)

elif p.parameter_type == api.DISCRETE:
return HyperParameter.discrete(p.name, p.feasible_space.list)

else:
logger.error(
"Cannot get the type for the parameter: %s (%s)",
Expand All @@ -110,33 +121,35 @@ def convert_parameter(p):


class HyperParameter(object):
def __init__(self, name, type_, min_, max_, list_, step):
def __init__(self, name, type_, min_, max_, list_, step, distribution=None):
self.name = name
self.type = type_
self.min = min_
self.max = max_
self.list = list_
self.step = step
self.distribution = distribution

def __str__(self):
if self.type == constant.INTEGER or self.type == constant.DOUBLE:
if self.type in [constant.INTEGER, constant.DOUBLE]:
return (
"HyperParameter(name: {}, type: {}, min: {}, max: {}, step: {})".format(
self.name, self.type, self.min, self.max, self.step
)
f"HyperParameter(name: {self.name}, type: {self.type}, min: {self.min}, "
f"max: {self.max}, step: {self.step}, distribution: {self.distribution})"
)
else:
return "HyperParameter(name: {}, type: {}, list: {})".format(
self.name, self.type, ", ".join(self.list)
)

@staticmethod
def int(name, min_, max_, step):
return HyperParameter(name, constant.INTEGER, min_, max_, [], step)
def int(name, min_, max_, step, distribution=None):
return HyperParameter(
name, constant.INTEGER, min_, max_, [], step, distribution
)

@staticmethod
def double(name, min_, max_, step):
return HyperParameter(name, constant.DOUBLE, min_, max_, [], step)
def double(name, min_, max_, step, distribution=None):
return HyperParameter(name, constant.DOUBLE, min_, max_, [], step, distribution)

@staticmethod
def categorical(name, lst):
Expand Down
Loading
Loading