Skip to content

Commit

Permalink
Generate AppWrapper name to provide unique workloads
Browse files Browse the repository at this point in the history
  • Loading branch information
sutaakar committed Jul 15, 2024
1 parent 03d5bee commit d2c1db3
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 22 deletions.
34 changes: 21 additions & 13 deletions test/e2e/mnist_pytorch_appwrapper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,20 @@ func runMnistPyTorchAppWrapper(t *testing.T, accelerator string) {
Kind: "Job",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mnist",
Namespace: namespace.Name,
GenerateName: "mnist",
Namespace: namespace.Name,
},
Spec: batchv1.JobSpec{
Completions: Ptr(int32(1)),
Parallelism: Ptr(int32(1)),
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Tolerations: []corev1.Toleration{
{
Key: "nvidia.com/gpu",
Operator: corev1.TolerationOpExists,
},
},
Containers: []corev1.Container{
{
Name: "job",
Expand Down Expand Up @@ -139,9 +145,9 @@ func runMnistPyTorchAppWrapper(t *testing.T, accelerator string) {
Kind: "AppWrapper",
},
ObjectMeta: metav1.ObjectMeta{
Name: "mnist",
Namespace: namespace.Name,
Labels: map[string]string{"kueue.x-k8s.io/queue-name": localQueue.Name},
GenerateName: "mnist",
Namespace: namespace.Name,
Labels: map[string]string{"kueue.x-k8s.io/queue-name": localQueue.Name},
},
Spec: mcadv1beta2.AppWrapperSpec{
Components: []mcadv1beta2.AppWrapperComponent{
Expand All @@ -158,16 +164,18 @@ func runMnistPyTorchAppWrapper(t *testing.T, accelerator string) {
unstruct := unstructured.Unstructured{Object: awMap}
_, err = test.Client().Dynamic().Resource(appWrapperResource).Namespace(namespace.Name).Create(test.Ctx(), &unstruct, metav1.CreateOptions{})
test.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created AppWrapper %s/%s successfully", aw.Namespace, aw.Name)
test.T().Logf("Created AppWrapper %s/%s successfully", aw.Namespace, aw.GenerateName)

test.T().Logf("Waiting for AppWrapper %s/%s to be running", aw.Namespace, aw.Name)
test.Eventually(AppWrapper(test, namespace, aw.Name), TestTimeoutMedium).
Should(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning)))
test.T().Logf("Waiting for AppWrapper %s/%s to be running", aw.Namespace, aw.GenerateName)
test.Eventually(AppWrappers(test, namespace), TestTimeoutMedium).
Should(ContainElement(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning))))

test.T().Logf("Waiting for AppWrapper %s/%s to complete", job.Namespace, job.Name)
test.Eventually(AppWrapper(test, namespace, aw.Name), TestTimeoutLong).Should(
Or(
WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperSucceeded)),
WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperFailed)),
test.Eventually(AppWrappers(test, namespace), TestTimeoutLong).Should(
ContainElement(
Or(
WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperSucceeded)),
WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperFailed)),
),
))
}
24 changes: 15 additions & 9 deletions test/e2e/mnist_rayjob_raycluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ func runMnistRayJobRayClusterAppWrapper(t *testing.T, accelerator string, number
Kind: "AppWrapper",
},
ObjectMeta: metav1.ObjectMeta{
Name: rayCluster.Name,
Namespace: namespace.Name,
Labels: map[string]string{"kueue.x-k8s.io/queue-name": localQueue.Name},
GenerateName: rayCluster.GenerateName,
Namespace: namespace.Name,
Labels: map[string]string{"kueue.x-k8s.io/queue-name": localQueue.Name},
},
Spec: mcadv1beta2.AppWrapperSpec{
Components: []mcadv1beta2.AppWrapperComponent{
Expand All @@ -145,11 +145,11 @@ func runMnistRayJobRayClusterAppWrapper(t *testing.T, accelerator string, number
unstruct := unstructured.Unstructured{Object: awMap}
_, err = test.Client().Dynamic().Resource(appWrapperResource).Namespace(namespace.Name).Create(test.Ctx(), &unstruct, metav1.CreateOptions{})
test.Expect(err).NotTo(HaveOccurred())
test.T().Logf("Created AppWrapper %s/%s successfully", aw.Namespace, aw.Name)
test.T().Logf("Created AppWrapper %s/%s successfully", aw.Namespace, aw.GenerateName)

test.T().Logf("Waiting for AppWrapper %s/%s to be running", aw.Namespace, aw.Name)
test.Eventually(AppWrapper(test, namespace, aw.Name), TestTimeoutMedium).
Should(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning)))
test.T().Logf("Waiting for AppWrapper %s/%s to be running", aw.Namespace, aw.GenerateName)
test.Eventually(AppWrappers(test, namespace), TestTimeoutMedium).
Should(ContainElement(WithTransform(AppWrapperPhase, Equal(mcadv1beta2.AppWrapperRunning))))

test.T().Logf("Waiting for RayCluster %s/%s to be running", rayCluster.Namespace, rayCluster.Name)
test.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Expand Down Expand Up @@ -206,8 +206,8 @@ func constructRayCluster(_ Test, namespace *corev1.Namespace, mnist *corev1.Conf
Kind: "RayCluster",
},
ObjectMeta: metav1.ObjectMeta{
Name: "raycluster",
Namespace: namespace.Name,
GenerateName: "raycluster",
Namespace: namespace.Name,
},
Spec: rayv1.RayClusterSpec{
RayVersion: GetRayVersion(),
Expand Down Expand Up @@ -266,6 +266,12 @@ func constructRayCluster(_ Test, namespace *corev1.Namespace, mnist *corev1.Conf
RayStartParams: map[string]string{},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Tolerations: []corev1.Toleration{
{
Key: "nvidia.com/gpu",
Operator: corev1.TolerationOpExists,
},
},
Containers: []corev1.Container{
{
Name: "ray-worker",
Expand Down

0 comments on commit d2c1db3

Please sign in to comment.