Merge branch 'master' into fix/grpc-bug

kubeflow · Aug 22, 2024 · 239a55f · 239a55f
2 parents a0902cb + 0e2ba6e
commit 239a55f
Show file tree

Hide file tree

Showing 85 changed files with 3,182 additions and 1,917 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 100
+# E203 is ignored to avoid conflicts with Black's formatting, as it's not PEP 8 compliant
+extend-ignore = W503, E203
diff --git a/.github/workflows/e2e-test-tune-api.yaml b/.github/workflows/e2e-test-tune-api.yaml
@@ -0,0 +1,34 @@
+name: E2E Test with tune API
+
+on:
+  pull_request:
+    paths-ignore:
+      - "pkg/ui/v1beta1/frontend/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    runs-on: ubuntu-22.04
+    timeout-minutes: 120
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Test Env
+        uses: ./.github/workflows/template-setup-e2e-test
+        with:
+          kubernetes-version: ${{ matrix.kubernetes-version }}
+
+      - name: Run e2e test with tune API
+        uses: ./.github/workflows/template-e2e-test
+        with:
+          tune-api: true
+
+    strategy:
+      fail-fast: false
+      matrix:
+        # Detail: https://hub.docker.com/r/kindest/node
+        kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"]
diff --git a/.github/workflows/template-e2e-test/action.yaml b/.github/workflows/template-e2e-test/action.yaml
@@ -4,15 +4,17 @@ description: Run e2e test using the minikube cluster
 
 inputs:
   experiments:
-    required: true
+    required: false
     description: comma delimited experiment name
+    default: ""
   training-operator:
     required: false
     description: whether to deploy training-operator or not
     default: false
   trial-images:
-    required: true
+    required: false
     description: comma delimited trial image name
+    default: ""
   katib-ui:
     required: true
     description: whether to deploy katib-ui or not
@@ -21,18 +23,27 @@ inputs:
     required: false
     description: mysql or postgres
     default: mysql
+  tune-api:
+    required: true
+    description: whether to execute tune-api test or not
+    default: false
 
 runs:
   using: composite
   steps:
     - name: Setup Minikube Cluster
       shell: bash
-      run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh ${{ inputs.katib-ui }} ${{ inputs.trial-images }} ${{ inputs.experiments }}
+      run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh ${{ inputs.katib-ui }} ${{ inputs.tune-api }} ${{ inputs.trial-images }} ${{ inputs.experiments }}
 
     - name: Setup Katib
       shell: bash
       run: ./test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh ${{ inputs.katib-ui }} ${{ inputs.training-operator }} ${{ inputs.database-type }}
 
     - name: Run E2E Experiment
       shell: bash
-      run: ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }}
+      run: |
+        if "${{ inputs.tune-api }}"; then
+          ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh
+        else
+          ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }}
+        fi
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,7 +10,17 @@ repos:
     hooks:
       - id: isort
         name: isort
-        entry: isort --profile google
+        entry: isort --profile black
+  - repo: https://github.com/psf/black
+    rev: 24.2.0
+    hooks:
+      - id: black
+        files: (sdk|examples|pkg)/.*
+  - repo: https://github.com/pycqa/flake8
+    rev: 7.1.1
+    hooks:
+      - id: flake8
+        files: (sdk|examples|pkg)/.*
 exclude: |
   (?x)^(
     .*zz_generated.deepcopy.*|

diff --git a/cmd/earlystopping/medianstop/v1beta1/main.py b/cmd/earlystopping/medianstop/v1beta1/main.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import logging
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py
@@ -13,9 +13,7 @@
 # limitations under the License.
 
 import argparse
-from logging import getLogger
-from logging import INFO
-from logging import StreamHandler
+from logging import INFO, StreamHandler, getLogger
 
 import api_pb2
 import api_pb2_grpc

diff --git a/cmd/suggestion/hyperband/v1beta1/main.py b/cmd/suggestion/hyperband/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/suggestion/hyperopt/v1beta1/main.py b/cmd/suggestion/hyperopt/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/suggestion/nas/darts/v1beta1/main.py b/cmd/suggestion/nas/darts/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/suggestion/nas/enas/v1beta1/main.py b/cmd/suggestion/nas/enas/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/suggestion/optuna/v1beta1/main.py b/cmd/suggestion/optuna/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/suggestion/pbt/v1beta1/main.py b/cmd/suggestion/pbt/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/cmd/suggestion/skopt/v1beta1/main.py b/cmd/suggestion/skopt/v1beta1/main.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from concurrent import futures
 import time
+from concurrent import futures
 
 import grpc
 

diff --git a/docs/proposals/parameter-distribution.md b/docs/proposals/parameter-distribution.md
@@ -0,0 +1,169 @@
+# Proposal for Supporting various parameter distributions in Katib
+
+## Summary
+The goal of this project is to enhance the existing Katib Experiment APIs to support various parameter distributions such as uniform, log-uniform, and qlog-uniform. Then extend the suggestion services to be able to configure distributions for search space using libraries provided in each framework.
+
+## Motivation
+Currently, [Katib](https://github.com/kubeflow/katib) is limited to supporting only uniform distribution for integer, float, and categorical hyperparameters. By introducing additional distributions, Katib will become more flexible and powerful in conducting hyperparameter optimization tasks.
+
+A Data Scientist requires Katib to support multiple hyperparameter distributions, such as log-uniform, normal, and log-normal, in addition to the existing uniform distribution. This enhancement is crucial for more flexible and precise hyperparameter optimization. For instance, learning rates often benefit from a log-uniform distribution because small values can significantly impact performance. Similarly, normal distributions are useful for parameters that are expected to vary around a central value.
+
+### Goals
+- Add `Distribution` field to `FeasibleSpace` alongside `ParameterType`.
+- Support for the log-uniform, normal, and log-normal Distributions.
+- Update the Experiment and gRPC API to support `Distribution`.
+- Update logic to handle the new parameter distributions for each suggestion service (e.g., Optuna, Hyperopt).
+- Extend the Python SDK to support the new `Distribution` field.
+### Non-Goals
+- This proposal do not aim to create new version for CRD APIs.
+- This proposal do not aim to make the necessary Katib UI changes.
+- No changes will be made to the core optimization algorithms beyond supporting new distributions.
+
+## Proposal
+
+### Parameter Distribution Comparison Table
+
+| Distribution Type             | Hyperopt              | Optuna                                          | Ray Tune              | Nevergrad                                    |
+|-------------------------------|-----------------------|-------------------------------------------------|-----------------------|---------------------------------------------|
+| **Uniform Continuous**        | `hp.uniform`          | `FloatDistribution`                             | `tune.uniform`        | `p.Scalar` with uniform transformation      |
+| **Quantized Uniform**         | `hp.quniform`         | `DiscreteUniformDistribution` (deprecated)      | `tune.quniform`       | `p.Scalar` with uniform and step specified  |
+| **Log Uniform**               | `hp.loguniform`       | `LogUniformDistribution` (deprecated)           | `tune.loguniform`     | `p.Log` with uniform transformation         |
+| **Uniform Integer**           | `hp.randint` or quantized distributions with step size `q` set to 1 | `IntDistribution`                    | `tune.randint`        | `p.Scalar` with integer transformation     |
+| **Categorical**               | `hp.choice`           | `CategoricalDistribution`                       | `tune.choice`         | `p.Choice`                                  |
+| **Quantized Log Uniform**     | `hp.qloguniform`      | Custom Implementation                           | `tune.qloguniform`    | `p.Log` with uniform and step specified    |
+| **Normal**                    | `hp.normal`           | (Not directly supported)                        | `tune.randn`          | (Not directly supported)                    |
+| **Quantized Normal**          | `hp.qnormal`          | (Not directly supported)                        | `tune.qrandn`         | (Not directly supported)                    |
+| **Log Normal**                | `hp.lognormal`        | (Not directly supported)                        | (Use custom transformation in `tune.randn`) | (Not directly supported)                    |
+| **Quantized Log Normal**      | `hp.qlognormal`       | (Not directly supported)                        | (Use custom transformation in `tune.qrandn`) | (Not directly supported)                    |
+| **Quantized Integer**         | `hp.quniformint`      | `IntUniformDistribution` (deprecated)           |                       | `p.Scalar` with integer and step specified  |
+| **Log Integer**               |                       | `IntLogUniformDistribution` (deprecated)        | `tune.lograndint`     | `p.Scalar` with log-integer transformation |
+
+
+- Note:
+In `Nevergrad`, parameter types like `p.Scalar`, `p.Log`, and `p.Choice` are mapped to corresponding `Hyperopt` search space definitions like `hp.uniform`, `hp.loguniform`, and `hp.choice` using internal functions to convert parameter bounds and distributions.
+
+## API Design
+### FeasibleSpace
+Feasible space for optimization.
+Int and Double type use Max/Min.
+Discrete and Categorical type use List.
+
+
+| Field | Type | Label | Description |
+| ----- | ---- | ----- | ----------- |
+| max | [string](#string) |  | Max Value |
+| min | [string](#string) |  | Minimum Value |
+| list | [string](#string) | repeated | List of Values. |
+| step | [string](#string) |  | Step for double or int parameter or q for quantization|
+| distribution | [Distribution](#api-v1-beta1-Distribution) |  | Type of the Distribution. |
+
+
+<a name="api-v1-beta1-Distribution"></a>
+
+### Distribution
+- Types of value for HyperParameter Distributions.
+- We add the `distribution` field to represent the hyperparameters search space rather than [`ParameterType`](https://github.com/kubeflow/katib/blob/2c575227586ff1c03cf6b5190d066e2f3061a404/pkg/apis/controller/experiments/v1beta1/experiment_types.go#L199-L207).
+- The `distribution` allows users to configure more granular search space customizations.
+- In this enhancement, we would propose the following 4 distributions:
+
+| Name | Number | Description |
+| ---- | ------ | ----------- |
+| UNIFORM | 0 | Continuous uniform distribution. Samples values evenly between a minimum and maximum value. Use &#34;Max/Min&#34;. Use &#34;Step&#34; for `q`. |
+| LOGUNIFORM | 1 | Samples values such that their logarithm is uniformly distributed. Use &#34;Max/Min&#34;. Use &#34;Step&#34; for `q`. |
+| NORMAL | 2 | Normal (Gaussian) distribution type. Samples values according to a normal distribution characterized by a mean and standard deviation. Use &#34;Max/Min&#34;. Use &#34;Step&#34; for `q`. |
+| LOGNORMAL | 3 | Log-normal distribution type. Samples values such that their logarithm is normally distributed. Use &#34;Max/Min&#34;. Use &#34;Step&#34; for `q`. |
+
+
+## Experiment API changes
+Scope: `pkg/apis/controller/experiments/v1beta1/experiment_types.go`
+
+```go
+type ParameterSpec struct {
+	Name          string        `json:"name,omitempty"`
+	ParameterType ParameterType `json:"parameterType,omitempty"`
+	FeasibleSpace FeasibleSpace `json:"feasibleSpace,omitempty"`
+}
+```
+- Adding new field `Distribution` to `FeasibleSpace`
+
+- The `Step` field can be used to define quantization steps for uniform or log-uniform distributions, effectively covering q-quantization requirements.
+
+Updated `FeasibleSpace` struct
+```diff
+type FeasibleSpace struct {
+	Max           string        `json:"max,omitempty"`
+	Min           string        `json:"min,omitempty"`
+	List          []string      `json:"list,omitempty"`
+	Step          string        `json:"step,omitempty"` // Step can be used to define q-quantization
++       Distribution  Distribution  `json:"distribution,omitempty"` // Added Distribution field
+}
+```
+ - New Field Description: `Distribution`
+  - Type: `Distribution`
+  - Description: The Distribution field specifies the type of statistical distribution to be applied to the parameter. This allows the definition of various distributions, such as uniform, log-uniform, or other supported types.
+
+- Defining `Distribution` type
+```go
+type Distribution string
+
+const (
+	DistributionUniform    Distribution = "uniform"
+	DistributionLogUniform Distribution = "logUniform"
+	DistributionNormal     Distribution = "normal"
+	DistributionLogNormal  Distribution = "logNormal"
+)
+```
+
+## gRPC API changes
+Scope: `pkg/apis/manager/v1beta1/api.proto`
+- Add the `Distribution` field to the `FeasibleSpace` message
+```diff
+/**
+ * Feasible space for optimization.
+ * Int and Double type use Max/Min.
+ * Discrete and Categorical type use List.
+ */
+message FeasibleSpace {
+    string max = 1; /// Max Value
+    string min = 2; /// Minimum Value
+    repeated string list = 3; /// List of Values.
+    string step = 4; /// Step for double or int parameter
++   Distribution distribution = 4; // Distribution of the parameter.
+}
+```
+- Define the `Distribution` enum
+```
+/**
+ * Distribution types for HyperParameter.
+ */
+enum Distribution {
+    UNIFORM = 0;
+    LOG_UNIFORM = 1;
+    NORMAL = 2;
+    LOG_NORMAL = 3;
+}
+```
+
+## Suggestion Service Logic
+- For each suggestion service (e.g., Optuna, Hyperopt), the logic will be updated to handle the new parameter distributions.
+- This involves modifying the conversion functions to map Katib distributions to the corresponding framework-specific distributions.
+
+#### Optuna
+ref: https://optuna.readthedocs.io/en/stable/reference/distributions.html
+
+For example:
+- Update the `_get_optuna_search_space` for new Distributions.
+scope: `pkg/suggestion/v1beta1/optuna/base_service.py`
+
+#### Goptuna
+ref: https://github.com/c-bata/goptuna/blob/2245ddd9e8d1edba750839893c8a618f852bc1cf/distribution.go
+
+#### Hyperopt
+ref: http://hyperopt.github.io/hyperopt/getting-started/search_spaces/#parameter-expressions
+
+#### Ray-tune
+ref: https://docs.ray.io/en/latest/tune/api/search_space.html
+
+## Python SDK
+Extend the Python SDK to support the new `Distribution` field.
+