-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sanity integration tests and Ray cluster test
Signed-off-by: Karel Suta <[email protected]>
- Loading branch information
1 parent
05695aa
commit a8e4423
Showing
8 changed files
with
1,088 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
module github.com/opendatahub-io/distributed-workloads/tests/new-tests | ||
|
||
require ( | ||
github.com/onsi/gomega v1.27.6 | ||
github.com/project-codeflare/codeflare-operator v0.0.7-0.20230724151606-c885e0aeaba8 | ||
github.com/ray-project/kuberay/ray-operator v0.0.0-20230614221720-085c29d40fa9 | ||
k8s.io/api v0.26.3 | ||
k8s.io/apimachinery v0.26.3 | ||
) | ||
|
||
require ( | ||
github.com/davecgh/go-spew v1.1.1 // indirect | ||
github.com/emicklei/go-restful/v3 v3.9.0 // indirect | ||
github.com/go-logr/logr v1.2.4 // indirect | ||
github.com/go-openapi/jsonpointer v0.19.5 // indirect | ||
github.com/go-openapi/jsonreference v0.20.0 // indirect | ||
github.com/go-openapi/swag v0.19.14 // indirect | ||
github.com/gogo/protobuf v1.3.2 // indirect | ||
github.com/golang/protobuf v1.5.3 // indirect | ||
github.com/google/gnostic v0.5.7-v3refs // indirect | ||
github.com/google/go-cmp v0.5.9 // indirect | ||
github.com/google/gofuzz v1.1.0 // indirect | ||
github.com/imdario/mergo v0.3.12 // indirect | ||
github.com/josharian/intern v1.0.0 // indirect | ||
github.com/json-iterator/go v1.1.12 // indirect | ||
github.com/mailru/easyjson v0.7.6 // indirect | ||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect | ||
github.com/modern-go/reflect2 v1.0.2 // indirect | ||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect | ||
github.com/project-codeflare/multi-cluster-app-dispatcher v1.32.0 // indirect | ||
github.com/spf13/pflag v1.0.5 // indirect | ||
golang.org/x/net v0.8.0 // indirect | ||
golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect | ||
golang.org/x/sys v0.6.0 // indirect | ||
golang.org/x/term v0.6.0 // indirect | ||
golang.org/x/text v0.8.0 // indirect | ||
golang.org/x/time v0.3.0 // indirect | ||
google.golang.org/appengine v1.6.7 // indirect | ||
google.golang.org/protobuf v1.28.1 // indirect | ||
gopkg.in/inf.v0 v0.9.1 // indirect | ||
gopkg.in/yaml.v2 v2.4.0 // indirect | ||
gopkg.in/yaml.v3 v3.0.1 // indirect | ||
k8s.io/apiextensions-apiserver v0.26.1 // indirect | ||
k8s.io/client-go v0.26.3 // indirect | ||
k8s.io/klog/v2 v2.80.1 // indirect | ||
k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect | ||
k8s.io/utils v0.0.0-20221128185143-99ec85e7a448 // indirect | ||
sigs.k8s.io/controller-runtime v0.14.6 // indirect | ||
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect | ||
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect | ||
sigs.k8s.io/yaml v1.3.0 // indirect | ||
) | ||
|
||
go 1.20 |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
Copyright 2023. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package integration | ||
|
||
import ( | ||
"os" | ||
) | ||
|
||
const ( | ||
// The environment variable for namespace where ODH is installed to. | ||
odhNamespaceEnvVar = "ODH_NAMESPACE" | ||
) | ||
|
||
func GetOpenDataHubNamespace() string { | ||
return lookupEnvOrDefault(odhNamespaceEnvVar, "opendatahub") | ||
} | ||
|
||
func lookupEnvOrDefault(key, value string) string { | ||
if v, ok := os.LookupEnv(key); ok { | ||
return v | ||
} | ||
return value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
/* | ||
Copyright 2023. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package integration | ||
|
||
import ( | ||
"encoding/base64" | ||
"testing" | ||
|
||
. "github.com/onsi/gomega" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
|
||
support "github.com/project-codeflare/codeflare-operator/test/support" | ||
rayv1alpha1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1alpha1" | ||
) | ||
|
||
func TestRayCluster(t *testing.T) { | ||
test := support.With(t) | ||
test.T().Parallel() | ||
|
||
// Create a namespace | ||
namespace := test.NewTestNamespace() | ||
|
||
// MNIST training script | ||
mnist := &corev1.ConfigMap{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: corev1.SchemeGroupVersion.String(), | ||
Kind: "ConfigMap", | ||
}, | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "mnist", | ||
Namespace: namespace.Name, | ||
}, | ||
BinaryData: map[string][]byte{ | ||
"mnist.py": ReadFile(test, "resources/mnist.py"), | ||
}, | ||
Immutable: support.Ptr(true), | ||
} | ||
mnist, err := test.Client().Core().CoreV1().ConfigMaps(namespace.Name).Create(test.Ctx(), mnist, metav1.CreateOptions{}) | ||
test.Expect(err).NotTo(HaveOccurred()) | ||
test.T().Logf("Created ConfigMap %s/%s successfully", mnist.Namespace, mnist.Name) | ||
|
||
// RayCluster, CR taken from https://github.com/project-codeflare/codeflare-operator/blob/main/test/e2e/mnist_rayjob_mcad_raycluster_test.go | ||
rayCluster := &rayv1alpha1.RayCluster{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: rayv1alpha1.GroupVersion.String(), | ||
Kind: "RayCluster", | ||
}, | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "raycluster", | ||
Namespace: namespace.Name, | ||
}, | ||
Spec: rayv1alpha1.RayClusterSpec{ | ||
RayVersion: support.GetRayVersion(), | ||
HeadGroupSpec: rayv1alpha1.HeadGroupSpec{ | ||
RayStartParams: map[string]string{ | ||
"dashboard-host": "0.0.0.0", | ||
}, | ||
Template: corev1.PodTemplateSpec{ | ||
Spec: corev1.PodSpec{ | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: "ray-head", | ||
Image: support.GetRayImage(), | ||
Ports: []corev1.ContainerPort{ | ||
{ | ||
ContainerPort: 6379, | ||
Name: "gcs", | ||
}, | ||
{ | ||
ContainerPort: 8265, | ||
Name: "dashboard", | ||
}, | ||
{ | ||
ContainerPort: 10001, | ||
Name: "client", | ||
}, | ||
}, | ||
Lifecycle: &corev1.Lifecycle{ | ||
PreStop: &corev1.LifecycleHandler{ | ||
Exec: &corev1.ExecAction{ | ||
Command: []string{"/bin/sh", "-c", "ray stop"}, | ||
}, | ||
}, | ||
}, | ||
Resources: corev1.ResourceRequirements{ | ||
Requests: corev1.ResourceList{ | ||
corev1.ResourceCPU: resource.MustParse("250m"), | ||
corev1.ResourceMemory: resource.MustParse("512Mi"), | ||
}, | ||
Limits: corev1.ResourceList{ | ||
corev1.ResourceCPU: resource.MustParse("1"), | ||
corev1.ResourceMemory: resource.MustParse("1G"), | ||
}, | ||
}, | ||
VolumeMounts: []corev1.VolumeMount{ | ||
{ | ||
Name: "mnist", | ||
MountPath: "/home/ray/jobs", | ||
}, | ||
}, | ||
}, | ||
}, | ||
Volumes: []corev1.Volume{ | ||
{ | ||
Name: "mnist", | ||
VolumeSource: corev1.VolumeSource{ | ||
ConfigMap: &corev1.ConfigMapVolumeSource{ | ||
LocalObjectReference: corev1.LocalObjectReference{ | ||
Name: mnist.Name, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
WorkerGroupSpecs: []rayv1alpha1.WorkerGroupSpec{ | ||
{ | ||
Replicas: support.Ptr(int32(1)), | ||
MinReplicas: support.Ptr(int32(1)), | ||
MaxReplicas: support.Ptr(int32(2)), | ||
GroupName: "small-group", | ||
RayStartParams: map[string]string{}, | ||
Template: corev1.PodTemplateSpec{ | ||
Spec: corev1.PodSpec{ | ||
InitContainers: []corev1.Container{ | ||
{ | ||
Name: "init-myservice", | ||
Image: "busybox:1.28", | ||
Command: []string{"sh", "-c", "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"}, | ||
}, | ||
}, | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: "ray-worker", | ||
Image: support.GetRayImage(), | ||
Lifecycle: &corev1.Lifecycle{ | ||
PreStop: &corev1.LifecycleHandler{ | ||
Exec: &corev1.ExecAction{ | ||
Command: []string{"/bin/sh", "-c", "ray stop"}, | ||
}, | ||
}, | ||
}, | ||
Resources: corev1.ResourceRequirements{ | ||
Requests: corev1.ResourceList{ | ||
corev1.ResourceCPU: resource.MustParse("250m"), | ||
corev1.ResourceMemory: resource.MustParse("256Mi"), | ||
}, | ||
Limits: corev1.ResourceList{ | ||
corev1.ResourceCPU: resource.MustParse("1"), | ||
corev1.ResourceMemory: resource.MustParse("512Mi"), | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
rayCluster, err = test.Client().Ray().RayV1alpha1().RayClusters(namespace.Name).Create(test.Ctx(), rayCluster, metav1.CreateOptions{}) | ||
test.Expect(err).NotTo(HaveOccurred()) | ||
test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) | ||
|
||
test.T().Logf("Waiting for RayCluster %s/%s to complete", rayCluster.Namespace, rayCluster.Name) | ||
test.Eventually(support.RayCluster(test, rayCluster.Namespace, rayCluster.Name), support.TestTimeoutLong). | ||
Should(WithTransform(support.RayClusterState, Equal(rayv1alpha1.Ready))) | ||
|
||
rayJob := &rayv1alpha1.RayJob{ | ||
TypeMeta: metav1.TypeMeta{ | ||
APIVersion: rayv1alpha1.GroupVersion.String(), | ||
Kind: "RayJob", | ||
}, | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "mnist", | ||
Namespace: namespace.Name, | ||
}, | ||
Spec: rayv1alpha1.RayJobSpec{ | ||
Entrypoint: "python /home/ray/jobs/mnist.py", | ||
RuntimeEnv: base64.StdEncoding.EncodeToString([]byte(` | ||
{ | ||
"pip": [ | ||
"pytorch_lightning==1.5.10", | ||
"torchmetrics==0.9.1", | ||
"torchvision==0.12.0" | ||
], | ||
"env_vars": { | ||
} | ||
} | ||
`)), | ||
ClusterSelector: map[string]string{ | ||
support.RayJobDefaultClusterSelectorKey: rayCluster.Name, | ||
}, | ||
ShutdownAfterJobFinishes: false, | ||
}, | ||
} | ||
rayJob, err = test.Client().Ray().RayV1alpha1().RayJobs(namespace.Name).Create(test.Ctx(), rayJob, metav1.CreateOptions{}) | ||
test.Expect(err).NotTo(HaveOccurred()) | ||
test.T().Logf("Created RayJob %s/%s successfully", rayJob.Namespace, rayJob.Name) | ||
|
||
// Retrieving the job logs once it has completed or timed out | ||
defer support.WriteRayJobLogs(test, rayJob.Namespace, rayJob.Name) | ||
|
||
test.T().Logf("Waiting for RayJob %s/%s to complete", rayJob.Namespace, rayJob.Name) | ||
test.Eventually(support.RayJob(test, rayJob.Namespace, rayJob.Name), support.TestTimeoutLong). | ||
Should(WithTransform(support.RayJobStatus, Satisfy(rayv1alpha1.IsJobTerminal))) | ||
|
||
// Assert the Ray job has completed successfully | ||
test.Expect(support.GetRayJob(test, rayJob.Namespace, rayJob.Name)). | ||
To(WithTransform(support.RayJobStatus, Equal(rayv1alpha1.JobStatusSucceeded))) | ||
} |
Oops, something went wrong.