From 51c44a630bb8a5a15ee14f7ced75f98f378bad60 Mon Sep 17 00:00:00 2001 From: Rueian Date: Wed, 25 Dec 2024 10:58:40 +0800 Subject: [PATCH 1/3] [RayCluster][CI] add e2e tests for the RayClusterSuspended condition Signed-off-by: Rueian --- .../test/sampleyaml/raycluster_test.go | 18 ++++++++++++++++++ ray-operator/test/support/yaml.go | 12 ++++++++++++ 2 files changed, 30 insertions(+) diff --git a/ray-operator/test/sampleyaml/raycluster_test.go b/ray-operator/test/sampleyaml/raycluster_test.go index 85dfeb51a2..1596ed3926 100644 --- a/ray-operator/test/sampleyaml/raycluster_test.go +++ b/ray-operator/test/sampleyaml/raycluster_test.go @@ -106,6 +106,24 @@ func TestRayCluster(t *testing.T) { // Check that all pods can submit jobs g.Eventually(SubmitJobsToAllPods(test, rayCluster), TestTimeoutShort).Should(Succeed()) + // Suspend RayCluster + KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, true) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionFalse, rayv1.RayClusterPodsProvisioning))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionTrue, string(rayv1.RayClusterSuspended)))) + + // Resume RayCluster + KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, false) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionFalse, string(rayv1.RayClusterSuspended)))) + // Delete all pods after setting quota to 0 to avoid recreating pods KubectlApplyQuota(test, namespace.Name, "--hard=cpu=0,memory=0G,pods=0") KubectlDeleteAllPods(test, namespace.Name) diff --git a/ray-operator/test/support/yaml.go b/ray-operator/test/support/yaml.go index 289b96bc50..6d6e783230 100644 --- a/ray-operator/test/support/yaml.go +++ b/ray-operator/test/support/yaml.go @@ -71,3 +71,15 @@ func KubectlDeleteAllPods(t Test, namespace string) { require.NoError(t.T(), err, "Failed to delete pods in %s", namespace) t.T().Logf("Successfully delete pods in %s", namespace) } + +func KubectlSetRayClusterSuspend(t Test, namespace, raycluster string, suspend bool) { + t.T().Helper() + patch := `{"spec":{"suspend":false}}` + if suspend { + patch = `{"spec":{"suspend":true}}` + } + kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "patch", "raycluster", raycluster, "-n", namespace, "--type=merge", "-p", patch) + err := kubectlCmd.Run() + require.NoError(t.T(), err, "Failed to set suspend=%t to %s in %s", suspend, raycluster, namespace) + t.T().Logf("Successfully set suspend=%t to %s in %s", suspend, raycluster, namespace) +} From 4fa94098dd48f12082558883989282c642b80883 Mon Sep 17 00:00:00 2001 From: Rueian Date: Thu, 26 Dec 2024 00:19:55 +0800 Subject: [PATCH 2/3] [RayCluster][CI] parallelize e2e sampleyaml tests Signed-off-by: Rueian --- .../test/sampleyaml/raycluster_test.go | 66 ++++++++++++++----- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/ray-operator/test/sampleyaml/raycluster_test.go b/ray-operator/test/sampleyaml/raycluster_test.go index 1596ed3926..89aadc1fde 100644 --- a/ray-operator/test/sampleyaml/raycluster_test.go +++ b/ray-operator/test/sampleyaml/raycluster_test.go @@ -106,36 +106,70 @@ func TestRayCluster(t *testing.T) { // Check that all pods can submit jobs g.Eventually(SubmitJobsToAllPods(test, rayCluster), TestTimeoutShort).Should(Succeed()) - // Suspend RayCluster - KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, true) + // Delete all pods after setting quota to 0 to avoid recreating pods + KubectlApplyQuota(test, namespace.Name, "--hard=cpu=0,memory=0G,pods=0") + KubectlDeleteAllPods(test, namespace.Name) + // The HeadPodReady condition should now be False with a HeadPodNotFound reason. g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) + // The RayClusterProvisioned condition should still be True. g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionFalse, rayv1.RayClusterPodsProvisioning))) + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + // The RayClusterReplicaFailure condition now be True with a FailedCreateHeadPod reason due to the quota limit. g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionTrue, string(rayv1.RayClusterSuspended)))) + Should(WithTransform(StatusCondition(rayv1.RayClusterReplicaFailure), MatchCondition(metav1.ConditionTrue, "FailedCreateHeadPod"))) + }) + } +} - // Resume RayCluster - KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, false) +func TestRayClusterSuspend(t *testing.T) { + tests := []struct { + name string + }{ + { + name: "ray-cluster.complete.yaml", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + test := With(t) + g := NewWithT(t) + + yamlFilePath := path.Join(GetSampleYAMLDir(test), tt.name) + namespace := test.NewTestNamespace() + rayClusterFromYaml := DeserializeRayClusterYAML(test, yamlFilePath) + KubectlApplyYAML(test, yamlFilePath, namespace.Name) + + rayCluster, err := GetRayCluster(test, namespace.Name, rayClusterFromYaml.Name) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(rayCluster).NotTo(BeNil()) + + test.T().Logf("Waiting for RayCluster %s/%s to be ready", namespace.Name, rayCluster.Name) g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionFalse, string(rayv1.RayClusterSuspended)))) + rayCluster, err = GetRayCluster(test, namespace.Name, rayCluster.Name) + g.Expect(err).NotTo(HaveOccurred()) - // Delete all pods after setting quota to 0 to avoid recreating pods - KubectlApplyQuota(test, namespace.Name, "--hard=cpu=0,memory=0G,pods=0") - KubectlDeleteAllPods(test, namespace.Name) - // The HeadPodReady condition should now be False with a HeadPodNotFound reason. + // Suspend RayCluster + KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, true) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionTrue, string(rayv1.RayClusterSuspended)))) g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) - // The RayClusterProvisioned condition should still be True. g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) - // The RayClusterReplicaFailure condition now be True with a FailedCreateHeadPod reason due to the quota limit. + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionFalse, rayv1.RayClusterPodsProvisioning))) + + // Resume RayCluster + KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, false) g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterReplicaFailure), MatchCondition(metav1.ConditionTrue, "FailedCreateHeadPod"))) + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionFalse, string(rayv1.RayClusterSuspended)))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) }) } } From 34484bd8c39ed58f896b764af98100303f5ef7cd Mon Sep 17 00:00:00 2001 From: Rueian Date: Sat, 28 Dec 2024 11:07:35 +0800 Subject: [PATCH 3/3] [RayCluster][CI] move the e2e test for the RayClusterSuspended to e2e folder Signed-off-by: Rueian --- ray-operator/test/e2e/raycluster_test.go | 46 ++++++++++++++++ .../test/sampleyaml/raycluster_test.go | 52 ------------------- ray-operator/test/support/yaml.go | 12 ----- 3 files changed, 46 insertions(+), 64 deletions(-) diff --git a/ray-operator/test/e2e/raycluster_test.go b/ray-operator/test/e2e/raycluster_test.go index fd6fd3eb26..565f635525 100644 --- a/ray-operator/test/e2e/raycluster_test.go +++ b/ray-operator/test/e2e/raycluster_test.go @@ -6,6 +6,7 @@ import ( . "github.com/onsi/gomega" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" @@ -75,3 +76,48 @@ func TestRayClusterManagedBy(t *testing.T) { g.Expect(errors.IsInvalid(err)).To(BeTrue(), "error: %v", err) }) } + +func TestRayClusterSuspend(t *testing.T) { + test := With(t) + g := NewWithT(t) + // Create a namespace + namespace := test.NewTestNamespace() + + rayClusterAC := rayv1ac.RayCluster("raycluster-suspend", namespace.Name).WithSpec(newRayClusterSpec()) + + rayCluster, err := test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Created RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to become ready", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) + + rayClusterAC = rayClusterAC.WithSpec(rayClusterAC.Spec.WithSuspend(true)) + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Suspend RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to be suspended", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionTrue, string(rayv1.RayClusterSuspended)))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionFalse, rayv1.RayClusterPodsProvisioning))) + + rayClusterAC = rayClusterAC.WithSpec(rayClusterAC.Spec.WithSuspend(false)) + rayCluster, err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Apply(test.Ctx(), rayClusterAC, TestApplyOptions) + g.Expect(err).NotTo(HaveOccurred()) + test.T().Logf("Resume RayCluster %s/%s successfully", rayCluster.Namespace, rayCluster.Name) + + test.T().Logf("Waiting for RayCluster %s/%s to be resumed", rayCluster.Namespace, rayCluster.Name) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionFalse, string(rayv1.RayClusterSuspended)))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) + g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). + Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) +} diff --git a/ray-operator/test/sampleyaml/raycluster_test.go b/ray-operator/test/sampleyaml/raycluster_test.go index 89aadc1fde..85dfeb51a2 100644 --- a/ray-operator/test/sampleyaml/raycluster_test.go +++ b/ray-operator/test/sampleyaml/raycluster_test.go @@ -121,55 +121,3 @@ func TestRayCluster(t *testing.T) { }) } } - -func TestRayClusterSuspend(t *testing.T) { - tests := []struct { - name string - }{ - { - name: "ray-cluster.complete.yaml", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - test := With(t) - g := NewWithT(t) - - yamlFilePath := path.Join(GetSampleYAMLDir(test), tt.name) - namespace := test.NewTestNamespace() - rayClusterFromYaml := DeserializeRayClusterYAML(test, yamlFilePath) - KubectlApplyYAML(test, yamlFilePath, namespace.Name) - - rayCluster, err := GetRayCluster(test, namespace.Name, rayClusterFromYaml.Name) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(rayCluster).NotTo(BeNil()) - - test.T().Logf("Waiting for RayCluster %s/%s to be ready", namespace.Name, rayCluster.Name) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) - rayCluster, err = GetRayCluster(test, namespace.Name, rayCluster.Name) - g.Expect(err).NotTo(HaveOccurred()) - - // Suspend RayCluster - KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, true) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionTrue, string(rayv1.RayClusterSuspended)))) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound))) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionFalse, rayv1.RayClusterPodsProvisioning))) - - // Resume RayCluster - KubectlSetRayClusterSuspend(test, namespace.Name, rayCluster.Name, false) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterSuspended), MatchCondition(metav1.ConditionFalse, string(rayv1.RayClusterSuspended)))) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady))) - g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium). - Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime))) - }) - } -} diff --git a/ray-operator/test/support/yaml.go b/ray-operator/test/support/yaml.go index 6d6e783230..289b96bc50 100644 --- a/ray-operator/test/support/yaml.go +++ b/ray-operator/test/support/yaml.go @@ -71,15 +71,3 @@ func KubectlDeleteAllPods(t Test, namespace string) { require.NoError(t.T(), err, "Failed to delete pods in %s", namespace) t.T().Logf("Successfully delete pods in %s", namespace) } - -func KubectlSetRayClusterSuspend(t Test, namespace, raycluster string, suspend bool) { - t.T().Helper() - patch := `{"spec":{"suspend":false}}` - if suspend { - patch = `{"spec":{"suspend":true}}` - } - kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "patch", "raycluster", raycluster, "-n", namespace, "--type=merge", "-p", patch) - err := kubectlCmd.Run() - require.NoError(t.T(), err, "Failed to set suspend=%t to %s in %s", suspend, raycluster, namespace) - t.T().Logf("Successfully set suspend=%t to %s in %s", suspend, raycluster, namespace) -}