Skip to content

Commit

Permalink
Sanity integration tests and Ray cluster test
Browse files Browse the repository at this point in the history
Signed-off-by: Karel Suta <[email protected]>
  • Loading branch information
sutaakar authored and astefanutti committed Jul 25, 2023
1 parent 05695aa commit a8e4423
Show file tree
Hide file tree
Showing 8 changed files with 1,088 additions and 2 deletions.
21 changes: 19 additions & 2 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ oc apply -f https://raw.githubusercontent.com/opendatahub-io/odh-manifests/maste
oc apply -f https://raw.githubusercontent.com/opendatahub-io/distributed-workloads/main/codeflare-stack-kfdef.yaml
```

## Setup
## Bash tests - Setup

Clone the [opendatahub-io/peak](https://github.com/opendatahub-io/peak) project anywhere you like in your working environment. But, do not clone it into the `distributed-workloads` directory.

Expand Down Expand Up @@ -66,7 +66,7 @@ Now we can setup our tests.

This should create a directory, `distributed-workloads` in the `operator-tests` directory of the peak repo.

## Running Tests
## Bash tests - Running Tests

`run.sh` will search through the 'operator-tests' directory for a *.sh file name we provide to it as an argument. In this case, we want to run the `distributed-workloads.sh` script.

Expand Down Expand Up @@ -115,6 +115,23 @@ In some cases, your cluster may not have the default user+password(admin, admin)
OPENSHIFT_TESTUSER_NAME=<user_name> OPENSHIFT_TESTUSER_PASS=<password> ./run.sh distributed-workloads.sh
```

## Go tests - Setup

* Install Go 1.20

## Go tests - Environment variables

* `ODH_NAMESPACE` - Namespace where ODH is installed
* `CODEFLARE_TEST_OUTPUT_DIR` - Output directory for test logs

## Go tests - Running Tests

Execute tests like standard Go unit tests.

```bash
go test -v ./integration
```

## Troubleshooting

If any of the above is unclear or you run into any problems, please open an issue in the [opendatahub-io/distributed-workloads](https://github.com/opendatahub-io/distributed-workloads/issues) repository.
54 changes: 54 additions & 0 deletions tests/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
module github.com/opendatahub-io/distributed-workloads/tests/new-tests

require (
github.com/onsi/gomega v1.27.6
github.com/project-codeflare/codeflare-operator v0.0.7-0.20230724151606-c885e0aeaba8
github.com/ray-project/kuberay/ray-operator v0.0.0-20230614221720-085c29d40fa9
k8s.io/api v0.26.3
k8s.io/apimachinery v0.26.3
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/project-codeflare/multi-cluster-app-dispatcher v1.32.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/net v0.8.0 // indirect
golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect
golang.org/x/sys v0.6.0 // indirect
golang.org/x/term v0.6.0 // indirect
golang.org/x/text v0.8.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.26.1 // indirect
k8s.io/client-go v0.26.3 // indirect
k8s.io/klog/v2 v2.80.1 // indirect
k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect
k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 // indirect
sigs.k8s.io/controller-runtime v0.14.6 // indirect
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

go 1.20
494 changes: 494 additions & 0 deletions tests/go.sum

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions tests/integration/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
Copyright 2023.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package integration

import (
"os"
)

const (
// The environment variable for namespace where ODH is installed to.
odhNamespaceEnvVar = "ODH_NAMESPACE"
)

func GetOpenDataHubNamespace() string {
return lookupEnvOrDefault(odhNamespaceEnvVar, "opendatahub")
}

func lookupEnvOrDefault(key, value string) string {
if v, ok := os.LookupEnv(key); ok {
return v
}
return value
}
230 changes: 230 additions & 0 deletions tests/integration/ray_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
/*
Copyright 2023.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package integration

import (
"encoding/base64"
"testing"

. "github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

support "github.com/project-codeflare/codeflare-operator/test/support"
rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1"
)

func TestRayCluster(t *testing.T) {
test := support.With(t)
test.T().Parallel()

// Create a namespace
namespace := test.NewTestNamespace()

// MNIST training script
mnist := &corev1.ConfigMap{
TypeMeta: metav1.TypeMeta{
APIVersion: corev1.SchemeGroupVersion.String(),
Kind: "ConfigMap",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mnist",
Namespace: namespace.Name,
},
BinaryData: map[string][]byte{
"mnist.py": ReadFile(test, "resources/mnist.py"),
},
Immutable: support.Ptr(true),
}
mnist, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Create(test.Ctx(), mnist, metav1.CreateOptions{})
test.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created ConfigMap %s/%s successfully", mnist.Namespace, mnist.Name)

// RayCluster, CR taken from https://github.com/project-codeflare/codeflare-operator/blob/main/test/e2e/mnist_rayjob_mcad_raycluster_test.go
rayCluster := &rayv1alpha1.RayCluster{
TypeMeta: metav1.TypeMeta{
APIVersion: rayv1alpha1.GroupVersion.String(),
Kind: "RayCluster",
},
ObjectMeta: metav1.ObjectMeta{
Name: "raycluster",
Namespace: namespace.Name,
},
Spec: rayv1alpha1.RayClusterSpec{
RayVersion: support.GetRayVersion(),
HeadGroupSpec: rayv1alpha1.HeadGroupSpec{
RayStartParams: map[string]string{
"dashboard-host": "0.0.0.0",
},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "ray-head",
Image: support.GetRayImage(),
Ports: []corev1.ContainerPort{
{
ContainerPort: 6379,
Name: "gcs",
},
{
ContainerPort: 8265,
Name: "dashboard",
},
{
ContainerPort: 10001,
Name: "client",
},
},
Lifecycle: &corev1.Lifecycle{
PreStop: &corev1.LifecycleHandler{
Exec: &corev1.ExecAction{
Command: []string{"/bin/sh", "-c", "ray stop"},
},
},
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("250m"),
corev1.ResourceMemory: resource.MustParse("512Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1G"),
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "mnist",
MountPath: "/home/ray/jobs",
},
},
},
},
Volumes: []corev1.Volume{
{
Name: "mnist",
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
LocalObjectReference: corev1.LocalObjectReference{
Name: mnist.Name,
},
},
},
},
},
},
},
},
WorkerGroupSpecs: []rayv1alpha1.WorkerGroupSpec{
{
Replicas: support.Ptr(int32(1)),
MinReplicas: support.Ptr(int32(1)),
MaxReplicas: support.Ptr(int32(2)),
GroupName: "small-group",
RayStartParams: map[string]string{},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
InitContainers: []corev1.Container{
{
Name: "init-myservice",
Image: "busybox:1.28",
Command: []string{"sh", "-c", "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"},
},
},
Containers: []corev1.Container{
{
Name: "ray-worker",
Image: support.GetRayImage(),
Lifecycle: &corev1.Lifecycle{
PreStop: &corev1.LifecycleHandler{
Exec: &corev1.ExecAction{
Command: []string{"/bin/sh", "-c", "ray stop"},
},
},
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("250m"),
corev1.ResourceMemory: resource.MustParse("256Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("512Mi"),
},
},
},
},
},
},
},
},
},
}

rayCluster, err = test.Client().Ray().RayV1alpha1().RayClusters(namespace.Name).Create(test.Ctx(), rayCluster, metav1.CreateOptions{})
test.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name)

test.T().Logf("Waiting for RayCluster %s/%s to complete", rayCluster.Namespace, rayCluster.Name)
test.Eventually(support.RayCluster(test, rayCluster.Namespace, rayCluster.Name), support.TestTimeoutLong).
Should(WithTransform(support.RayClusterState, Equal(rayv1alpha1.Ready)))

rayJob := &rayv1alpha1.RayJob{
TypeMeta: metav1.TypeMeta{
APIVersion: rayv1alpha1.GroupVersion.String(),
Kind: "RayJob",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mnist",
Namespace: namespace.Name,
},
Spec: rayv1alpha1.RayJobSpec{
Entrypoint: "python /home/ray/jobs/mnist.py",
RuntimeEnv: base64.StdEncoding.EncodeToString([]byte(`
{
"pip": [
"pytorch_lightning==1.5.10",
"torchmetrics==0.9.1",
"torchvision==0.12.0"
],
"env_vars": {
}
}
`)),
ClusterSelector: map[string]string{
support.RayJobDefaultClusterSelectorKey: rayCluster.Name,
},
ShutdownAfterJobFinishes: false,
},
}
rayJob, err = test.Client().Ray().RayV1alpha1().RayJobs(namespace.Name).Create(test.Ctx(), rayJob, metav1.CreateOptions{})
test.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name)

// Retrieving the job logs once it has completed or timed out
defer support.WriteRayJobLogs(test, rayJob.Namespace, rayJob.Name)

test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name)
test.Eventually(support.RayJob(test, rayJob.Namespace, rayJob.Name), support.TestTimeoutLong).
Should(WithTransform(support.RayJobStatus, Satisfy(rayv1alpha1.IsJobTerminal)))

// Assert the Ray job has completed successfully
test.Expect(support.GetRayJob(test, rayJob.Namespace, rayJob.Name)).
To(WithTransform(support.RayJobStatus, Equal(rayv1alpha1.JobStatusSucceeded)))
}
Loading

0 comments on commit a8e4423

Please sign in to comment.