From b1a1fda9978a03cd3fd4e892b16b9e39c4858cfd Mon Sep 17 00:00:00 2001 From: Kevin Date: Tue, 6 Aug 2024 14:24:23 -0400 Subject: [PATCH] add hash to end of resource names to avoid name clash Signed-off-by: Kevin --- pkg/controllers/raycluster_controller.go | 67 +++++++++++++++++++--- pkg/controllers/raycluster_webhook.go | 16 +++--- pkg/controllers/raycluster_webhook_test.go | 42 +++++++++----- pkg/controllers/support.go | 10 ++++ 4 files changed, 105 insertions(+), 30 deletions(-) diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index ba1cb17c..0dc716e4 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -126,6 +126,11 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } + if err := deleteDeprecatedObjects(ctx, r, cluster); err != nil { + logger.Error(err, "Failed to delete deprecated objects") + return ctrl.Result{RequeueAfter: requeueTime}, err + } + if cluster.ObjectMeta.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(cluster, oAuthFinalizer) { logger.Info("Add a finalizer", "finalizer", oAuthFinalizer) @@ -304,7 +309,7 @@ func isMTLSEnabled(cfg *config.KubeRayConfiguration) bool { } func crbNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-" + cluster.Namespace + "-auth" // NOTE: potential naming conflicts ie {name: foo, ns: bar-baz} and {name: foo-bar, ns: baz} + return RCCUniqueName(cluster.Name + "-" + cluster.Namespace + "-auth") } func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.ClusterRoleBindingApplyConfiguration { @@ -326,7 +331,7 @@ func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.Cluster } func oauthServiceAccountNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-oauth-proxy" + return RCCUniqueName(cluster.Name + "-oauth-proxy") } func desiredServiceAccount(cluster *rayv1.RayCluster) *corev1ac.ServiceAccountApplyConfiguration { @@ -363,11 +368,11 @@ func desiredClusterRoute(cluster *rayv1.RayCluster) *routev1ac.RouteApplyConfigu } func oauthServiceNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-oauth" + return RCCUniqueName(cluster.Name + "-oauth") } func oauthServiceTLSSecretName(cluster *rayv1.RayCluster) string { - return cluster.Name + "-proxy-tls-secret" + return RCCUniqueName(cluster.Name + "-proxy-tls-secret") } func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfiguration { @@ -389,7 +394,7 @@ func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfig } func oauthSecretNameFromCluster(cluster *rayv1.RayCluster) string { - return cluster.Name + "-oauth-config" + return RCCUniqueName(cluster.Name + "-oauth-config") } // desiredOAuthSecret defines the desired OAuth secret object @@ -406,7 +411,7 @@ func desiredOAuthSecret(cluster *rayv1.RayCluster, cookieSalt string) *corev1ac. } func caSecretNameFromCluster(cluster *rayv1.RayCluster) string { - return "ca-secret-" + cluster.Name + return RCCUniqueName(cluster.Name + "-ca-secret") } func desiredCASecret(cluster *rayv1.RayCluster, key, cert []byte) *corev1ac.SecretApplyConfiguration { @@ -463,7 +468,9 @@ func generateCACertificate() ([]byte, []byte, error) { } func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.NetworkPolicyApplyConfiguration { - return networkingv1ac.NetworkPolicy(cluster.Name+"-workers", cluster.Namespace). + return networkingv1ac.NetworkPolicy( + RCCUniqueName(cluster.Name+"-workers"), cluster.Namespace, + ). WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}). WithSpec(networkingv1ac.NetworkPolicySpec(). WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "worker"})). @@ -484,7 +491,7 @@ func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConf if ptr.Deref(cfg.MTLSEnabled, true) { allSecuredPorts = append(allSecuredPorts, networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001))) } - return networkingv1ac.NetworkPolicy(cluster.Name+"-head", cluster.Namespace). + return networkingv1ac.NetworkPolicy(RCCUniqueName(cluster.Name+"-head"), cluster.Namespace). WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}). WithSpec(networkingv1ac.NetworkPolicySpec(). WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"})). @@ -619,3 +626,47 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return controller.Complete(r) } + +func RCCUniqueName(s string) string { + return s + "-" + seededHash(controllerName, s) +} + +func deleteDeprecatedObjects(ctx context.Context, r *RayClusterReconciler, cluster *rayv1.RayCluster) error { + // Delete deprecated objects if they exist. These have all been replace by objects with names generated by + // rccUniqueName. This is a temporary measure to clean up old objects that were created before the name generation + // TODO: DELETE THIS FUNCTION AFTER A FEW RELEASES. Current release = v1.7.0 (remove in 1.9.0 or 1.10.0) + logger := ctrl.LoggerFrom(ctx) + if err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Delete(ctx, cluster.Name+"-oauth-config", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete oauth secret") + return err + } + if err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Delete(ctx, "ca-secret-"+cluster.Name, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete ca secret") + return err + } + if err := r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Delete(ctx, cluster.Name+"-oauth-proxy", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete SA") + return err + } + if err := r.kubeClient.RbacV1().ClusterRoleBindings().Delete(ctx, cluster.Name+"-"+cluster.Namespace+"-auth", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete CRB") + return err + } + if err := r.kubeClient.CoreV1().Services(cluster.Namespace).Delete(ctx, cluster.Name+"-oauth", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete service") + return err + } + if err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Delete(ctx, cluster.Name+"-proxy-tls-secret", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete tls secret") + return err + } + if err := r.kubeClient.NetworkingV1().NetworkPolicies(cluster.Namespace).Delete(ctx, cluster.Name+"-workers", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete worker nwp") + return err + } + if err := r.kubeClient.NetworkingV1().NetworkPolicies(cluster.Namespace).Delete(ctx, cluster.Name+"-head", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "Failed to delete head nwp") + return err + } + return nil +} diff --git a/pkg/controllers/raycluster_webhook.go b/pkg/controllers/raycluster_webhook.go index 4e4b259f..21c66277 100644 --- a/pkg/controllers/raycluster_webhook.go +++ b/pkg/controllers/raycluster_webhook.go @@ -74,7 +74,7 @@ func (w *rayClusterWebhook) Default(ctx context.Context, obj runtime.Object) err rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, oauthProxyTLSSecretVolume(rayCluster), withVolumeName(oauthProxyVolumeName)) - rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy" + rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = oauthServiceAccountNameFromCluster(rayCluster) } if ptr.Deref(w.Config.MTLSEnabled, true) { @@ -218,7 +218,7 @@ func validateIngress(rayCluster *rayv1.RayCluster) field.ErrorList { func validateHeadGroupServiceAccountName(rayCluster *rayv1.RayCluster) field.ErrorList { var allErrors field.ErrorList - if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName != rayCluster.Name+"-oauth-proxy" { + if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName != oauthServiceAccountNameFromCluster(rayCluster) { allErrors = append(allErrors, field.Invalid( field.NewPath("spec", "headGroupSpec", "template", "spec", "serviceAccountName"), rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName, @@ -241,7 +241,7 @@ func oauthProxyContainer(rayCluster *rayv1.RayCluster) corev1.Container { ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: rayCluster.Name + "-oauth-config", + Name: oauthSecretNameFromCluster(rayCluster), }, Key: "cookie_secret", }, @@ -251,7 +251,7 @@ func oauthProxyContainer(rayCluster *rayv1.RayCluster) corev1.Container { Args: []string{ "--https-address=:8443", "--provider=openshift", - "--openshift-service-account=" + rayCluster.Name + "-oauth-proxy", + "--openshift-service-account=" + oauthServiceAccountNameFromCluster(rayCluster), "--upstream=http://localhost:8265", "--tls-cert=/etc/tls/private/tls.crt", "--tls-key=/etc/tls/private/tls.key", @@ -273,7 +273,7 @@ func oauthProxyTLSSecretVolume(rayCluster *rayv1.RayCluster) corev1.Volume { Name: oauthProxyVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: rayCluster.Name + "-proxy-tls-secret", + SecretName: oauthServiceTLSSecretName(rayCluster), }, }, } @@ -329,7 +329,7 @@ func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume { Name: "ca-vol", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: `ca-secret-` + rayCluster.Name, + SecretName: caSecretNameFromCluster(rayCluster), }, }, }, @@ -343,9 +343,9 @@ func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume { } func rayHeadInitContainer(rayCluster *rayv1.RayCluster, config *config.KubeRayConfiguration) corev1.Container { - rayClientRoute := "rayclient-" + rayCluster.Name + "-" + rayCluster.Namespace + "." + config.IngressDomain + rayClientRoute := rayClientNameFromCluster(rayCluster) + "-" + rayCluster.Namespace + "." + config.IngressDomain // Service name for basic interactive - svcDomain := rayCluster.Name + "-head-svc." + rayCluster.Namespace + ".svc" + svcDomain := serviceNameFromCluster(rayCluster) + "." + rayCluster.Namespace + ".svc" initContainerHead := corev1.Container{ Name: "create-cert", diff --git a/pkg/controllers/raycluster_webhook_test.go b/pkg/controllers/raycluster_webhook_test.go index 8e0c375a..2a6b74db 100644 --- a/pkg/controllers/raycluster_webhook_test.go +++ b/pkg/controllers/raycluster_webhook_test.go @@ -124,7 +124,7 @@ func TestRayClusterWebhookDefault(t *testing.T) { t.Run("Expected required service account name for the head group", func(t *testing.T) { test.Expect(validRayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName). - To(Equal(validRayCluster.Name+"-oauth-proxy"), + To(Equal(oauthServiceAccountNameFromCluster(validRayCluster)), "Expected the service account name to be set correctly") }) @@ -230,7 +230,13 @@ func TestRayClusterWebhookDefault(t *testing.T) { func TestValidateCreate(t *testing.T) { test := support.NewTest(t) - + emptyRayCluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: rayClusterName, + Namespace: namespace, + }, + Spec: rayv1.RayClusterSpec{}, + } validRayCluster := &rayv1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Name: rayClusterName, @@ -253,7 +259,7 @@ func TestValidateCreate(t *testing.T) { ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: rayClusterName + "-oauth-config", + Name: oauthSecretNameFromCluster(emptyRayCluster), }, Key: "cookie_secret", }, @@ -263,7 +269,7 @@ func TestValidateCreate(t *testing.T) { Args: []string{ "--https-address=:8443", "--provider=openshift", - "--openshift-service-account=" + rayClusterName + "-oauth-proxy", + "--openshift-service-account=" + oauthServiceAccountNameFromCluster(emptyRayCluster), "--upstream=http://localhost:8265", "--tls-cert=/etc/tls/private/tls.crt", "--tls-key=/etc/tls/private/tls.key", @@ -284,12 +290,12 @@ func TestValidateCreate(t *testing.T) { Name: oauthProxyVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: rayClusterName + "-proxy-tls-secret", + SecretName: oauthServiceTLSSecretName(emptyRayCluster), }, }, }, }, - ServiceAccountName: rayClusterName + "-oauth-proxy", + ServiceAccountName: oauthServiceAccountNameFromCluster(emptyRayCluster), }, }, RayStartParams: map[string]string{}, @@ -351,7 +357,15 @@ func TestValidateCreate(t *testing.T) { func TestValidateUpdate(t *testing.T) { test := support.NewTest(t) - + emptyRayCluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: rayClusterName, + Namespace: namespace, + }, + Spec: rayv1.RayClusterSpec{}, + } + rayClientRoute := rayClientNameFromCluster(emptyRayCluster) + "-" + emptyRayCluster.Namespace + "." + rcWebhook.Config.IngressDomain + svcDomain := serviceNameFromCluster(emptyRayCluster) + "." + emptyRayCluster.Namespace + ".svc" validRayCluster := &rayv1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Name: rayClusterName, @@ -374,7 +388,7 @@ func TestValidateUpdate(t *testing.T) { ValueFrom: &corev1.EnvVarSource{ SecretKeyRef: &corev1.SecretKeySelector{ LocalObjectReference: corev1.LocalObjectReference{ - Name: rayClusterName + "-oauth-config", + Name: oauthSecretNameFromCluster(emptyRayCluster), }, Key: "cookie_secret", }, @@ -396,7 +410,7 @@ func TestValidateUpdate(t *testing.T) { Args: []string{ "--https-address=:8443", "--provider=openshift", - "--openshift-service-account=" + rayClusterName + "-oauth-proxy", + "--openshift-service-account=" + oauthServiceAccountNameFromCluster(emptyRayCluster), "--upstream=http://localhost:8265", "--tls-cert=/etc/tls/private/tls.crt", "--tls-key=/etc/tls/private/tls.key", @@ -419,7 +433,7 @@ func TestValidateUpdate(t *testing.T) { Command: []string{ "sh", "-c", - `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = rayclient-` + rayClusterName + `-` + namespace + `.\nDNS.6 = ` + rayClusterName + `-head-svc.` + namespace + `.svc` + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, + `cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`, }, VolumeMounts: []corev1.VolumeMount{ { @@ -440,7 +454,7 @@ func TestValidateUpdate(t *testing.T) { Name: oauthProxyVolumeName, VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: rayClusterName + "-proxy-tls-secret", + SecretName: oauthServiceTLSSecretName(emptyRayCluster), }, }, }, @@ -448,7 +462,7 @@ func TestValidateUpdate(t *testing.T) { Name: "ca-vol", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: `ca-secret-` + rayClusterName, + SecretName: caSecretNameFromCluster(emptyRayCluster), }, }, }, @@ -459,7 +473,7 @@ func TestValidateUpdate(t *testing.T) { }, }, }, - ServiceAccountName: rayClusterName + "-oauth-proxy", + ServiceAccountName: oauthServiceAccountNameFromCluster(emptyRayCluster), }, }, RayStartParams: map[string]string{}, @@ -505,7 +519,7 @@ func TestValidateUpdate(t *testing.T) { Name: "ca-vol", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: `ca-secret-` + rayClusterName, + SecretName: caSecretNameFromCluster(emptyRayCluster), }, }, }, diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index b2fec8b8..9d236848 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -1,6 +1,8 @@ package controllers import ( + "crypto/sha256" + "fmt" "os" "github.com/go-logr/logr" @@ -209,3 +211,11 @@ func ownerRefForRayCluster(cluster *rayv1.RayCluster) *v1.OwnerReferenceApplyCon WithUID(cluster.UID). WithController(true) } + +var ( + hashLength = 8 +) + +func seededHash(seed string, s string) string { + return fmt.Sprintf("%x", sha256.Sum256([]byte(seed+s)))[:hashLength] +}