From 7f00118fb291fe1c716845ac36495df9e4345188 Mon Sep 17 00:00:00 2001 From: Kevin Date: Tue, 6 Aug 2024 14:24:23 -0400 Subject: [PATCH] add hash to end of resource names to avoid name clash also added a version annotation to raycluster for the CFO version Signed-off-by: Kevin --- main.go | 2 +- pkg/controllers/raycluster_controller.go | 60 ++++- pkg/controllers/raycluster_webhook.go | 34 ++- pkg/controllers/raycluster_webhook_test.go | 296 ++++++++++++++++++--- pkg/controllers/support.go | 10 + 5 files changed, 347 insertions(+), 55 deletions(-) diff --git a/main.go b/main.go index b0b2d0c5..d9e46c22 100644 --- a/main.go +++ b/main.go @@ -248,7 +248,7 @@ func setupRayClusterController(mgr ctrl.Manager, cfg *config.CodeFlareOperatorCo <-certsReady setupLog.Info("Certs ready") - err := controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay) + err := controllers.SetupRayClusterWebhookWithManager(mgr, cfg.KubeRay, OperatorVersion) if err != nil { return err } diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index ba1cb17c..902570bb 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -114,6 +114,12 @@ var ( // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.3/pkg/reconcile +func shouldUseOldName(cluster *rayv1.RayCluster) bool { + // hashed name code was added in the same commit as the version annotation + _, ok := cluster.GetAnnotations()[versionAnnotation] + return !ok +} + func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) @@ -304,7 +310,10 @@ func isMTLSEnabled(cfg *config.KubeRayConfiguration) bool { } func crbNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-" + cluster.Namespace + "-auth" // NOTE: potential naming conflicts ie {name: foo, ns: bar-baz} and {name: foo-bar, ns: baz} + if shouldUseOldName(cluster) { + return cluster.Name + "-" + cluster.Namespace + "-auth" + } + return RCCUniqueName(cluster.Name + "-" + cluster.Namespace + "-auth") } func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.ClusterRoleBindingApplyConfiguration { @@ -326,7 +335,10 @@ func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.Cluster } func oauthServiceAccountNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-oauth-proxy" + if shouldUseOldName(cluster) { + return cluster.Name + "-oauth-proxy" + } + return RCCUniqueName(cluster.Name + "-oauth-proxy") } func desiredServiceAccount(cluster *rayv1.RayCluster) *corev1ac.ServiceAccountApplyConfiguration { @@ -363,11 +375,17 @@ func desiredClusterRoute(cluster *rayv1.RayCluster) *routev1ac.RouteApplyConfigu } func oauthServiceNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-oauth" + if shouldUseOldName(cluster) { + return cluster.Name + "-oauth" + } + return RCCUniqueName(cluster.Name + "-oauth") } func oauthServiceTLSSecretName(cluster *rayv1.RayCluster) string { - return cluster.Name + "-proxy-tls-secret" + if shouldUseOldName(cluster) { + return cluster.Name + "-proxy-tls-secret" + } + return RCCUniqueName(cluster.Name + "-proxy-tls-secret") } func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfiguration { @@ -389,7 +407,10 @@ func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfig } func oauthSecretNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-oauth-config" + if shouldUseOldName(cluster) { + return cluster.Name + "-oauth-config" + } + return RCCUniqueName(cluster.Name + "-oauth-config") } // desiredOAuthSecret defines the desired OAuth secret object @@ -406,7 +427,10 @@ func desiredOAuthSecret(cluster *rayv1.RayCluster, cookieSalt string) *corev1ac. } func caSecretNameFromCluster(cluster *rayv1.RayCluster) string { - return "ca-secret-" + cluster.Name + if shouldUseOldName(cluster) { + return "ca-secret-" + cluster.Name + } + return RCCUniqueName(cluster.Name + "-ca-secret") } func desiredCASecret(cluster *rayv1.RayCluster, key, cert []byte) *corev1ac.SecretApplyConfiguration { @@ -462,8 +486,17 @@ func generateCACertificate() ([]byte, []byte, error) { return privateKeyPem, certPem, nil } +func workerNWPNameFromCluster(cluster *rayv1.RayCluster) string { + if shouldUseOldName(cluster) { + return cluster.Name + "-workers" + } + return RCCUniqueName(cluster.Name + "-workers") +} + func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.NetworkPolicyApplyConfiguration { - return networkingv1ac.NetworkPolicy(cluster.Name+"-workers", cluster.Namespace). + return networkingv1ac.NetworkPolicy( + workerNWPNameFromCluster(cluster), cluster.Namespace, + ). WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}). WithSpec(networkingv1ac.NetworkPolicySpec(). WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "worker"})). @@ -477,6 +510,13 @@ func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.Netw WithOwnerReferences(ownerRefForRayCluster(cluster)) } +func headNWPNameFromCluster(cluster *rayv1.RayCluster) string { + if shouldUseOldName(cluster) { + return cluster.Name + "-head" + } + return RCCUniqueName(cluster.Name + "-head") +} + func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConfiguration, kubeRayNamespaces []string) *networkingv1ac.NetworkPolicyApplyConfiguration { allSecuredPorts := []*networkingv1ac.NetworkPolicyPortApplyConfiguration{ networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8443)), @@ -484,7 +524,7 @@ func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConf if ptr.Deref(cfg.MTLSEnabled, true) { allSecuredPorts = append(allSecuredPorts, networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001))) } - return networkingv1ac.NetworkPolicy(cluster.Name+"-head", cluster.Namespace). + return networkingv1ac.NetworkPolicy(headNWPNameFromCluster(cluster), cluster.Namespace). WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}). WithSpec(networkingv1ac.NetworkPolicySpec(). WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"})). @@ -619,3 +659,7 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return controller.Complete(r) } + +func RCCUniqueName(s string) string { + return s + "-" + seededHash(controllerName, s) +} diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index 4e4b259f..c630bd5b 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -38,14 +38,16 @@ const ( oauthProxyContainerName = "oauth-proxy" oauthProxyVolumeName = "proxy-tls-secret" initContainerName = "create-cert" + versionAnnotation = "ray.openshift.ai/version" ) // log is for logging in this package. var rayclusterlog = logf.Log.WithName("raycluster-resource") -func SetupRayClusterWebhookWithManager(mgr ctrl.Manager, cfg *config.KubeRayConfiguration) error { +func SetupRayClusterWebhookWithManager(mgr ctrl.Manager, cfg *config.KubeRayConfiguration, operatorVersion string) error { rayClusterWebhookInstance := &rayClusterWebhook{ - Config: cfg, + Config: cfg, + OperatorVersion: operatorVersion, } return ctrl.NewWebhookManagedBy(mgr). For(&rayv1.RayCluster{}). @@ -58,7 +60,8 @@ func SetupRayClusterWebhookWithManager(mgr ctrl.Manager, cfg *config.KubeRayConf // +kubebuilder:webhook:path=/validate-ray-io-v1-raycluster,mutating=false,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=vraycluster.ray.openshift.ai,admissionReviewVersions=v1 type rayClusterWebhook struct { - Config *config.KubeRayConfiguration + Config *config.KubeRayConfiguration + OperatorVersion string } var _ webhook.CustomDefaulter = &rayClusterWebhook{} @@ -66,15 +69,24 @@ var _ webhook.CustomValidator = &rayClusterWebhook{} // Default implements webhook.Defaulter so a webhook will be registered for the type func (w *rayClusterWebhook) Default(ctx context.Context, obj runtime.Object) error { + logger := ctrl.LoggerFrom(ctx) rayCluster := obj.(*rayv1.RayCluster) + // add annotation to use new names + annotations := rayCluster.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + annotations[versionAnnotation] = w.OperatorVersion + rayCluster.SetAnnotations(annotations) + logger.Info("Ray Cluster annotations", "annotations", rayCluster.GetAnnotations()) if ptr.Deref(w.Config.RayDashboardOAuthEnabled, true) { rayclusterlog.V(2).Info("Adding OAuth sidecar container") rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Containers, oauthProxyContainer(rayCluster), withContainerName(oauthProxyContainerName)) rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, oauthProxyTLSSecretVolume(rayCluster), withVolumeName(oauthProxyVolumeName)) - rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy" + rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = oauthServiceAccountNameFromCluster(rayCluster) } if ptr.Deref(w.Config.MTLSEnabled, true) { @@ -218,7 +230,7 @@ func validateIngress(rayCluster *rayv1.RayCluster) field.ErrorList { func validateHeadGroupServiceAccountName(rayCluster *rayv1.RayCluster) field.ErrorList { var allErrors field.ErrorList - if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName != rayCluster.Name+"-oauth-proxy" { + if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName != oauthServiceAccountNameFromCluster(rayCluster) { allErrors = append(allErrors, field.Invalid( field.NewPath("spec", "headGroupSpec", "template", "spec", "serviceAccountName"), rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName, @@ -241,7 +253,7 @@ func oauthProxyContainer(rayCluster *rayv1.RayCluster) corev1.Container { ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: rayCluster.Name + "-oauth-config", + Name: oauthSecretNameFromCluster(rayCluster), }, Key: "cookie_secret", }, @@ -251,7 +263,7 @@ func oauthProxyContainer(rayCluster *rayv1.RayCluster) corev1.Container { Args: []string{ "--https-address=:8443", "--provider=openshift", - "--openshift-service-account=" + rayCluster.Name + "-oauth-proxy", + "--openshift-service-account=" + oauthServiceAccountNameFromCluster(rayCluster), "--upstream=http://localhost:8265", "--tls-cert=/etc/tls/private/tls.crt", "--tls-key=/etc/tls/private/tls.key", @@ -273,7 +285,7 @@ func oauthProxyTLSSecretVolume(rayCluster *rayv1.RayCluster) corev1.Volume { Name: oauthProxyVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: rayCluster.Name + "-proxy-tls-secret", + SecretName: oauthServiceTLSSecretName(rayCluster), }, }, } @@ -329,7 +341,7 @@ func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume { Name: "ca-vol", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: `ca-secret-` + rayCluster.Name, + SecretName: caSecretNameFromCluster(rayCluster), }, }, }, @@ -343,9 +355,9 @@ func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume { } func rayHeadInitContainer(rayCluster *rayv1.RayCluster, config *config.KubeRayConfiguration) corev1.Container { - rayClientRoute := "rayclient-" + rayCluster.Name + "-" + rayCluster.Namespace + "." + config.IngressDomain + rayClientRoute := rayClientNameFromCluster(rayCluster) + "-" + rayCluster.Namespace + "." + config.IngressDomain // Service name for basic interactive - svcDomain := rayCluster.Name + "-head-svc." + rayCluster.Namespace + ".svc" + svcDomain := serviceNameFromCluster(rayCluster) + "." + rayCluster.Namespace + ".svc" initContainerHead := corev1.Container{ Name: "create-cert", diff --git a/pkg/controllers/raycluster_webhook_test.go b/pkg/controllers/raycluster_webhook_test.go index 8e0c375a..d1c798a8 100644 --- a/pkg/controllers/raycluster_webhook_test.go +++ b/pkg/controllers/raycluster_webhook_test.go @@ -35,7 +35,8 @@ var ( rayClusterName = "test-raycluster" rcWebhook = &rayClusterWebhook{ - Config: &config.KubeRayConfiguration{}, + Config: &config.KubeRayConfiguration{}, + OperatorVersion: "0.0.0", } ) @@ -124,7 +125,7 @@ func TestRayClusterWebhookDefault(t *testing.T) { t.Run("Expected required service account name for the head group", func(t *testing.T) { test.Expect(validRayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName). - To(Equal(validRayCluster.Name+"-oauth-proxy"), + To(Equal(validRayCluster.Name+"-oauth-proxy-ecaffa62"), "Expected the service account name to be set correctly") }) @@ -230,11 +231,47 @@ func TestRayClusterWebhookDefault(t *testing.T) { func TestValidateCreate(t *testing.T) { test := support.NewTest(t) + minimalRayCluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: rayClusterName, + Namespace: namespace, + }, + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: rayv1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "head"}, + }, + }, + }, + RayStartParams: map[string]string{}, + }, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + GroupName: "worker-group-1", + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "worker-container-1", + }, + }, + }, + }, + RayStartParams: map[string]string{}, + }, + }, + }, + } validRayCluster := &rayv1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Name: rayClusterName, Namespace: namespace, + Annotations: map[string]string{ + versionAnnotation: "0.0.0", + }, }, Spec: rayv1.RayClusterSpec{ HeadGroupSpec: rayv1.HeadGroupSpec{ @@ -253,7 +290,7 @@ func TestValidateCreate(t *testing.T) { ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: rayClusterName + "-oauth-config", + Name: rayClusterName + "-oauth-config-01e408a7", }, Key: "cookie_secret", }, @@ -263,7 +300,7 @@ func TestValidateCreate(t *testing.T) { Args: []string{ "--https-address=:8443", "--provider=openshift", - "--openshift-service-account=" + rayClusterName + "-oauth-proxy", + "--openshift-service-account=" + rayClusterName + "-oauth-proxy-ecaffa62", "--upstream=http://localhost:8265", "--tls-cert=/etc/tls/private/tls.crt", "--tls-key=/etc/tls/private/tls.key", @@ -284,12 +321,12 @@ func TestValidateCreate(t *testing.T) { Name: oauthProxyVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: rayClusterName + "-proxy-tls-secret", + SecretName: rayClusterName + "-proxy-tls-secret-3e5a0266", }, }, }, }, - ServiceAccountName: rayClusterName + "-oauth-proxy", + ServiceAccountName: rayClusterName + "-oauth-proxy-ecaffa62", }, }, RayStartParams: map[string]string{}, @@ -298,12 +335,18 @@ func TestValidateCreate(t *testing.T) { } // Create the RayClusters - if _, err := test.Client().Ray().RayV1().RayClusters(namespace).Create(test.Ctx(), validRayCluster, metav1.CreateOptions{}); err != nil { + if _, err := test.Client().Ray().RayV1().RayClusters(namespace).Create(test.Ctx(), minimalRayCluster, metav1.CreateOptions{}); err != nil { test.T().Fatalf("Failed to create RayCluster: %v", err) } + t.Run("Expect updated values in minimal RayCluster", func(t *testing.T) { + err := rcWebhook.Default(test.Ctx(), runtime.Object(minimalRayCluster)) + test.Expect(err).ShouldNot(HaveOccurred(), "Expected no errors on call to Default function") + test.Expect(minimalRayCluster.GetAnnotations()[versionAnnotation]).ShouldNot(BeNil(), "Expected version annotation to be set") + }) + // Call to ValidateCreate function is made - warnings, err := rcWebhook.ValidateCreate(test.Ctx(), runtime.Object(validRayCluster)) + warnings, err := rcWebhook.ValidateCreate(test.Ctx(), runtime.Object(minimalRayCluster)) t.Run("Expected no warnings or errors on call to ValidateCreate function", func(t *testing.T) { test.Expect(warnings).Should(BeNil(), "Expected no warnings on call to ValidateCreate function") test.Expect(err).ShouldNot(HaveOccurred(), "Expected no errors on call to ValidateCreate function") @@ -351,8 +394,184 @@ func TestValidateCreate(t *testing.T) { func TestValidateUpdate(t *testing.T) { test := support.NewTest(t) + rayClientRoute := "rayclient-" + rayClusterName + "-" + namespace + "." + rcWebhook.Config.IngressDomain + svcDomain := rayClusterName + "-head-svc." + namespace + ".svc" + validRayClusterNewNames := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: rayClusterName, + Namespace: namespace, + Annotations: map[string]string{ + versionAnnotation: "0.0.0", + }, + }, + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: rayv1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: oauthProxyContainerName, + Image: OAuthProxyImage, + Ports: []corev1.ContainerPort{ + {ContainerPort: 8443, Name: "oauth-proxy"}, + }, + Env: []corev1.EnvVar{ + { + Name: "COOKIE_SECRET", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: rayClusterName + "-oauth-config-01e408a7", + }, + Key: "cookie_secret", + }, + }, + }, + { + Name: "MY_POD_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + {Name: "RAY_USE_TLS", Value: "1"}, + {Name: "RAY_TLS_SERVER_CERT", Value: "/home/ray/workspace/tls/server.crt"}, + {Name: "RAY_TLS_SERVER_KEY", Value: "/home/ray/workspace/tls/server.key"}, + {Name: "RAY_TLS_CA_CERT", Value: "/home/ray/workspace/tls/ca.crt"}, + }, + Args: []string{ + "--https-address=:8443", + "--provider=openshift", + "--openshift-service-account=" + rayClusterName + "-oauth-proxy-ecaffa62", + "--upstream=http://localhost:8265", + "--tls-cert=/etc/tls/private/tls.crt", + "--tls-key=/etc/tls/private/tls.key", + "--cookie-secret=$(COOKIE_SECRET)", + "--openshift-delegate-urls={\"/\":{\"resource\":\"pods\",\"namespace\":\"" + namespace + "\",\"verb\":\"get\"}}", + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: oauthProxyVolumeName, + MountPath: "/etc/tls/private", + ReadOnly: true, + }, + }, + }, + }, + InitContainers: []corev1.Container{ + { + Name: "create-cert", + Image: "registry.redhat.io/ubi9@sha256:770cf07083e1c85ae69c25181a205b7cdef63c11b794c89b3b487d4670b4c328", + Command: []string{ + "sh", + "-c", + `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "ca-vol", + MountPath: "/home/ray/workspace/ca", + ReadOnly: true, + }, + { + Name: "server-cert", + MountPath: "/home/ray/workspace/tls", + ReadOnly: false, + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: oauthProxyVolumeName, + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: rayClusterName + "-proxy-tls-secret-3e5a0266", + }, + }, + }, + { + Name: "ca-vol", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: rayClusterName + "-ca-secret-001432aa", + }, + }, + }, + { + Name: "server-cert", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + }, + ServiceAccountName: rayClusterName + "-oauth-proxy-ecaffa62", + }, + }, + RayStartParams: map[string]string{}, + }, + WorkerGroupSpecs: []rayv1.WorkerGroupSpec{ + { + GroupName: "worker-group-1", + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "worker-container-1", + Env: []corev1.EnvVar{ + { + Name: "MY_POD_IP", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "status.podIP", + }, + }, + }, + {Name: "RAY_USE_TLS", Value: "1"}, + {Name: "RAY_TLS_SERVER_CERT", Value: "/home/ray/workspace/tls/server.crt"}, + {Name: "RAY_TLS_SERVER_KEY", Value: "/home/ray/workspace/tls/server.key"}, + {Name: "RAY_TLS_CA_CERT", Value: "/home/ray/workspace/tls/ca.crt"}, + }, + }, + }, + InitContainers: []corev1.Container{ + { + Name: "create-cert", + Image: "registry.redhat.io/ubi9@sha256:770cf07083e1c85ae69c25181a205b7cdef63c11b794c89b3b487d4670b4c328", + Command: []string{ + "sh", + "-c", + `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, + }, + VolumeMounts: certVolumeMounts(), + }, + }, + Volumes: []corev1.Volume{ + { + Name: "ca-vol", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: rayClusterName + "-ca-secret-001432aa", + }, + }, + }, + { + Name: "server-cert", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }, + }, + }, + }, + RayStartParams: map[string]string{}, + }, + }, + }, + } - validRayCluster := &rayv1.RayCluster{ + validRayClusterOldNames := &rayv1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Name: rayClusterName, Namespace: namespace, @@ -419,7 +638,7 @@ func TestValidateUpdate(t *testing.T) { Command: []string{ "sh", "-c", - `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = rayclient-` + rayClusterName + `-` + namespace + `.\nDNS.6 = ` + rayClusterName + `-head-svc.` + namespace + `.svc` + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, + `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, }, VolumeMounts: []corev1.VolumeMount{ { @@ -448,7 +667,7 @@ func TestValidateUpdate(t *testing.T) { Name: "ca-vol", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: `ca-secret-` + rayClusterName, + SecretName: "ca-secret-" + rayClusterName, }, }, }, @@ -505,7 +724,7 @@ func TestValidateUpdate(t *testing.T) { Name: "ca-vol", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: `ca-secret-` + rayClusterName, + SecretName: "ca-secret-" + rayClusterName, }, }, }, @@ -525,69 +744,76 @@ func TestValidateUpdate(t *testing.T) { } // Create the RayClusters - if _, err := test.Client().Ray().RayV1().RayClusters(namespace).Create(test.Ctx(), validRayCluster, metav1.CreateOptions{}); err != nil { + if _, err := test.Client().Ray().RayV1().RayClusters(namespace).Create(test.Ctx(), validRayClusterNewNames, metav1.CreateOptions{}); err != nil { test.T().Fatalf("Failed to create RayCluster: %v", err) } // Positive Test Case: Valid RayCluster with immutable fields - t.Run("Expected no warnings or errors on call to ValidateUpdate function", func(t *testing.T) { - warnings, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(validRayCluster)) + t.Run("Expected no warnings or errors on call to ValidateUpdate function with version annotation set", func(t *testing.T) { + test.Expect(runtime.Object(validRayClusterNewNames).(*rayv1.RayCluster).Annotations).ShouldNot(BeNil(), "Expected version annotation to be set") + warnings, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(validRayClusterNewNames)) + test.Expect(warnings).Should(BeNil(), "Expected no warnings on call to ValidateUpdate function") + test.Expect(err).ShouldNot(HaveOccurred(), "Expected no errors on call to ValidateUpdate function") + }) + + t.Run("Expected no warnings or errors on call to ValidateUpdate function with version annotation unset", func(t *testing.T) { + warnings, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterOldNames), runtime.Object(validRayClusterOldNames)) test.Expect(warnings).Should(BeNil(), "Expected no warnings on call to ValidateUpdate function") test.Expect(err).ShouldNot(HaveOccurred(), "Expected no errors on call to ValidateUpdate function") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to EnableIngress set to True", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() invalidRayCluster.Spec.HeadGroupSpec.EnableIngress = support.Ptr(true) - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to EnableIngress set to True") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated OAuth Proxy Container", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for i, headContainer := range invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Containers { if headContainer.Name == oauthProxyContainerName { invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[i].Args = []string{"--invalid-arg"} break } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated OAuth Proxy Container") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated OAuth Proxy Volume", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for i, headVolume := range invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes { if headVolume.Name == oauthProxyVolumeName { invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes[i].Secret.SecretName = "invalid-secret-name" break } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated OAuth Proxy Volume") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated head group service account name", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = "invalid-service-account-name" - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated head group service account name") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated Init Container in the head group", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for i, headInitContainer := range invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers { if headInitContainer.Name == "create-cert" { invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.InitContainers[i].Command = []string{"manipulated command"} break } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated Init Container in the head group") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated Init Container in the worker group", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for _, workerGroup := range invalidRayCluster.Spec.WorkerGroupSpecs { for i, workerInitContainer := range workerGroup.Template.Spec.InitContainers { if workerInitContainer.Name == "create-cert" { @@ -596,24 +822,24 @@ func TestValidateUpdate(t *testing.T) { } } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated Init Container in the worker group") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated Volume in the head group", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for i, headVolume := range invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes { if headVolume.Name == "ca-vol" { invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes[i].Secret.SecretName = "invalid-secret-name" break } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated Volume in the head group") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated Volume in the worker group", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for _, workerGroup := range invalidRayCluster.Spec.WorkerGroupSpecs { for i, workerVolume := range workerGroup.Template.Spec.Volumes { if workerVolume.Name == "ca-vol" { @@ -622,24 +848,24 @@ func TestValidateUpdate(t *testing.T) { } } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated Volume in the worker group") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated env vars in the head group", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for i, headEnvVar := range invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env { if headEnvVar.Name == "RAY_USE_TLS" { invalidRayCluster.Spec.HeadGroupSpec.Template.Spec.Containers[0].Env[i].Value = "invalid-value" break } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated env vars in the head group") }) t.Run("Negative: Expected errors on call to ValidateUpdate function due to manipulated env vars in the worker group", func(t *testing.T) { - invalidRayCluster := validRayCluster.DeepCopy() + invalidRayCluster := validRayClusterNewNames.DeepCopy() for _, workerGroup := range invalidRayCluster.Spec.WorkerGroupSpecs { for i, workerEnvVar := range workerGroup.Template.Spec.Containers[0].Env { if workerEnvVar.Name == "RAY_USE_TLS" { @@ -648,7 +874,7 @@ func TestValidateUpdate(t *testing.T) { } } } - _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayCluster), runtime.Object(invalidRayCluster)) + _, err := rcWebhook.ValidateUpdate(test.Ctx(), runtime.Object(validRayClusterNewNames), runtime.Object(invalidRayCluster)) test.Expect(err).Should(HaveOccurred(), "Expected errors on call to ValidateUpdate function due to manipulated env vars in the worker group") }) } diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index b2fec8b8..9d236848 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -1,6 +1,8 @@ package controllers import ( + "crypto/sha256" + "fmt" "os" "github.com/go-logr/logr" @@ -209,3 +211,11 @@ func ownerRefForRayCluster(cluster *rayv1.RayCluster) *v1.OwnerReferenceApplyCon WithUID(cluster.UID). WithController(true) } + +var ( + hashLength = 8 +) + +func seededHash(seed string, s string) string { + return fmt.Sprintf("%x", sha256.Sum256([]byte(seed+s)))[:hashLength] +}