Skip to content

Commit

Permalink
[RayCluster][CI] add e2e tests for RayClusterStatusCondition (#2661)
Browse files Browse the repository at this point in the history
  • Loading branch information
rueian authored Dec 18, 2024
1 parent e595ee4 commit 8362483
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 8 deletions.
3 changes: 2 additions & 1 deletion ray-operator/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ spec:
containers:
- command:
- /manager
# args:
args:
- --feature-gates=RayClusterStatusConditions=true # this argument can be removed for version >= v1.3 where the feature gate is enabled by default.
# - --enable-leader-election
image: kuberay/operator
imagePullPolicy: IfNotPresent
Expand Down
19 changes: 19 additions & 0 deletions ray-operator/test/sampleyaml/raycluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
. "github.com/ray-project/kuberay/ray-operator/test/support"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestRayCluster(t *testing.T) {
Expand Down Expand Up @@ -76,6 +78,10 @@ func TestRayCluster(t *testing.T) {
g.Expect(rayCluster).NotTo(BeNil())

test.T().Logf("Waiting for RayCluster %s/%s to be ready", namespace.Name, rayCluster.Name)
g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionTrue, rayv1.HeadPodRunningAndReady)))
g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime)))
g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Should(WithTransform(RayClusterState, Equal(rayv1.Ready)))
rayCluster, err = GetRayCluster(test, namespace.Name, rayCluster.Name)
Expand All @@ -99,6 +105,19 @@ func TestRayCluster(t *testing.T) {

// Check that all pods can submit jobs
g.Eventually(SubmitJobsToAllPods(test, rayCluster), TestTimeoutShort).Should(Succeed())

// Delete all pods after setting quota to 0 to avoid recreating pods
KubectlApplyQuota(test, namespace.Name, "--hard=cpu=0,memory=0G,pods=0")
KubectlDeleteAllPods(test, namespace.Name)
// The HeadPodReady condition should now be False with a HeadPodNotFound reason.
g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Should(WithTransform(StatusCondition(rayv1.HeadPodReady), MatchCondition(metav1.ConditionFalse, rayv1.HeadPodNotFound)))
// The RayClusterProvisioned condition should still be True.
g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Should(WithTransform(StatusCondition(rayv1.RayClusterProvisioned), MatchCondition(metav1.ConditionTrue, rayv1.AllPodRunningAndReadyFirstTime)))
// The RayClusterReplicaFailure condition now be True with a FailedCreateHeadPod reason due to the quota limit.
g.Eventually(RayCluster(test, namespace.Name, rayCluster.Name), TestTimeoutMedium).
Should(WithTransform(StatusCondition(rayv1.RayClusterReplicaFailure), MatchCondition(metav1.ConditionTrue, "FailedCreateHeadPod")))
})
}
}
44 changes: 44 additions & 0 deletions ray-operator/test/support/ray.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package support
import (
"errors"

"github.com/onsi/gomega/format"
"github.com/onsi/gomega/types"
"github.com/stretchr/testify/assert"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -70,6 +72,48 @@ func RayClusterState(cluster *rayv1.RayCluster) rayv1.ClusterState {
return cluster.Status.State //nolint:staticcheck // https://github.com/ray-project/kuberay/pull/2288
}

func StatusCondition(condType rayv1.RayClusterConditionType) func(*rayv1.RayCluster) metav1.Condition {
return func(cluster *rayv1.RayCluster) metav1.Condition {
if cluster != nil {
for _, cond := range cluster.Status.Conditions {
if cond.Type == string(condType) {
return cond
}
}
}
return metav1.Condition{}
}
}

type ConditionMatcher struct {
expected metav1.Condition
}

func (c *ConditionMatcher) Match(actual interface{}) (success bool, err error) {
if actual == nil {
return false, errors.New("<actual> should be a metav1.Condition but it is nil")
}
a, ok := actual.(metav1.Condition)
if !ok {
return false, errors.New("<actual> should be a metav1.Condition")
}
return a.Reason == c.expected.Reason && a.Status == c.expected.Status, nil
}

func (c *ConditionMatcher) FailureMessage(actual interface{}) (message string) {
a := actual.(metav1.Condition)
return format.Message(a, "to equal", c.expected)
}

func (c *ConditionMatcher) NegatedFailureMessage(actual interface{}) (message string) {
a := actual.(metav1.Condition)
return format.Message(a, "not to equal", c.expected)
}

func MatchCondition(status metav1.ConditionStatus, reason string) types.GomegaMatcher {
return &ConditionMatcher{expected: metav1.Condition{Status: status, Reason: reason}}
}

func RayClusterDesiredWorkerReplicas(cluster *rayv1.RayCluster) int32 {
return cluster.Status.DesiredWorkerReplicas
}
Expand Down
28 changes: 21 additions & 7 deletions ray-operator/test/support/yaml.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"os"
"os/exec"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"k8s.io/apimachinery/pkg/runtime"

Expand All @@ -28,32 +28,46 @@ func DeserializeRayClusterYAML(t Test, filename string) *rayv1.RayCluster {
t.T().Helper()
rayCluster := &rayv1.RayCluster{}
err := deserializeYAML(filename, rayCluster)
assert.NoError(t.T(), err)
require.NoError(t.T(), err, "Fail to deserialize yaml file %s", filename)
return rayCluster
}

func DeserializeRayJobYAML(t Test, filename string) *rayv1.RayJob {
t.T().Helper()
rayJob := &rayv1.RayJob{}
err := deserializeYAML(filename, rayJob)
assert.NoError(t.T(), err)
require.NoError(t.T(), err, "Fail to deserialize yaml file %s", filename)
return rayJob
}

func DeserializeRayServiceYAML(t Test, filename string) *rayv1.RayService {
t.T().Helper()
rayService := &rayv1.RayService{}
err := deserializeYAML(filename, rayService)
assert.NoError(t.T(), err)
require.NoError(t.T(), err, "Fail to deserialize yaml file %s", filename)
return rayService
}

func KubectlApplyYAML(t Test, filename string, namespace string) {
t.T().Helper()
kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "apply", "-f", filename, "-n", namespace)
err := kubectlCmd.Run()
if err != nil {
t.T().Fatalf("Failed to apply %s to namespace %s: %v", filename, namespace, err)
}
require.NoError(t.T(), err, "Failed to apply %s to namespace %s", filename, namespace)
t.T().Logf("Successfully applied %s to namespace %s", filename, namespace)
}

func KubectlApplyQuota(t Test, namespace, quota string) {
t.T().Helper()
kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "create", "quota", namespace, "-n", namespace, quota)
err := kubectlCmd.Run()
require.NoError(t.T(), err, "Failed to apply quota %s in %s", quota, namespace)
t.T().Logf("Successfully applied quota %s in %s", quota, namespace)
}

func KubectlDeleteAllPods(t Test, namespace string) {
t.T().Helper()
kubectlCmd := exec.CommandContext(t.Ctx(), "kubectl", "delete", "--all", "pods", "-n", namespace)
err := kubectlCmd.Run()
require.NoError(t.T(), err, "Failed to delete pods in %s", namespace)
t.T().Logf("Successfully delete pods in %s", namespace)
}

0 comments on commit 8362483

Please sign in to comment.