Skip to content

Commit

Permalink
add hash to end of resource names to avoid name clash
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin <[email protected]>
  • Loading branch information
KPostOffice committed Aug 21, 2024
1 parent f31a77d commit b1a1fda
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 30 deletions.
67 changes: 59 additions & 8 deletions pkg/controllers/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, client.IgnoreNotFound(err)
}

if err := deleteDeprecatedObjects(ctx, r, cluster); err != nil {
logger.Error(err, "Failed to delete deprecated objects")
return ctrl.Result{RequeueAfter: requeueTime}, err
}

if cluster.ObjectMeta.DeletionTimestamp.IsZero() {
if !controllerutil.ContainsFinalizer(cluster, oAuthFinalizer) {
logger.Info("Add a finalizer", "finalizer", oAuthFinalizer)
Expand Down Expand Up @@ -304,7 +309,7 @@ func isMTLSEnabled(cfg *config.KubeRayConfiguration) bool {
}

func crbNameFromCluster(cluster *rayv1.RayCluster) string {
return cluster.Name + "-" + cluster.Namespace + "-auth" // NOTE: potential naming conflicts ie {name: foo, ns: bar-baz} and {name: foo-bar, ns: baz}
return RCCUniqueName(cluster.Name + "-" + cluster.Namespace + "-auth")
}

func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.ClusterRoleBindingApplyConfiguration {
Expand All @@ -326,7 +331,7 @@ func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacv1ac.Cluster
}

func oauthServiceAccountNameFromCluster(cluster *rayv1.RayCluster) string {
return cluster.Name + "-oauth-proxy"
return RCCUniqueName(cluster.Name + "-oauth-proxy")
}

func desiredServiceAccount(cluster *rayv1.RayCluster) *corev1ac.ServiceAccountApplyConfiguration {
Expand Down Expand Up @@ -363,11 +368,11 @@ func desiredClusterRoute(cluster *rayv1.RayCluster) *routev1ac.RouteApplyConfigu
}

func oauthServiceNameFromCluster(cluster *rayv1.RayCluster) string {
return cluster.Name + "-oauth"
return RCCUniqueName(cluster.Name + "-oauth")
}

func oauthServiceTLSSecretName(cluster *rayv1.RayCluster) string {
return cluster.Name + "-proxy-tls-secret"
return RCCUniqueName(cluster.Name + "-proxy-tls-secret")
}

func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfiguration {
Expand All @@ -389,7 +394,7 @@ func desiredOAuthService(cluster *rayv1.RayCluster) *corev1ac.ServiceApplyConfig
}

func oauthSecretNameFromCluster(cluster *rayv1.RayCluster) string {
return cluster.Name + "-oauth-config"
return RCCUniqueName(cluster.Name + "-oauth-config")
}

// desiredOAuthSecret defines the desired OAuth secret object
Expand All @@ -406,7 +411,7 @@ func desiredOAuthSecret(cluster *rayv1.RayCluster, cookieSalt string) *corev1ac.
}

func caSecretNameFromCluster(cluster *rayv1.RayCluster) string {
return "ca-secret-" + cluster.Name
return RCCUniqueName(cluster.Name + "-ca-secret")
}

func desiredCASecret(cluster *rayv1.RayCluster, key, cert []byte) *corev1ac.SecretApplyConfiguration {
Expand Down Expand Up @@ -463,7 +468,9 @@ func generateCACertificate() ([]byte, []byte, error) {
}

func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.NetworkPolicyApplyConfiguration {
return networkingv1ac.NetworkPolicy(cluster.Name+"-workers", cluster.Namespace).
return networkingv1ac.NetworkPolicy(
RCCUniqueName(cluster.Name+"-workers"), cluster.Namespace,
).
WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}).
WithSpec(networkingv1ac.NetworkPolicySpec().
WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "worker"})).
Expand All @@ -484,7 +491,7 @@ func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConf
if ptr.Deref(cfg.MTLSEnabled, true) {
allSecuredPorts = append(allSecuredPorts, networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(10001)))
}
return networkingv1ac.NetworkPolicy(cluster.Name+"-head", cluster.Namespace).
return networkingv1ac.NetworkPolicy(RCCUniqueName(cluster.Name+"-head"), cluster.Namespace).
WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}).
WithSpec(networkingv1ac.NetworkPolicySpec().
WithPodSelector(metav1ac.LabelSelector().WithMatchLabels(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"})).
Expand Down Expand Up @@ -619,3 +626,47 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {

return controller.Complete(r)
}

func RCCUniqueName(s string) string {
return s + "-" + seededHash(controllerName, s)
}

func deleteDeprecatedObjects(ctx context.Context, r *RayClusterReconciler, cluster *rayv1.RayCluster) error {
// Delete deprecated objects if they exist. These have all been replace by objects with names generated by
// rccUniqueName. This is a temporary measure to clean up old objects that were created before the name generation
// TODO: DELETE THIS FUNCTION AFTER A FEW RELEASES. Current release = v1.7.0 (remove in 1.9.0 or 1.10.0)
logger := ctrl.LoggerFrom(ctx)
if err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Delete(ctx, cluster.Name+"-oauth-config", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete oauth secret")
return err
}
if err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Delete(ctx, "ca-secret-"+cluster.Name, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete ca secret")
return err
}
if err := r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Delete(ctx, cluster.Name+"-oauth-proxy", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete SA")
return err
}
if err := r.kubeClient.RbacV1().ClusterRoleBindings().Delete(ctx, cluster.Name+"-"+cluster.Namespace+"-auth", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete CRB")
return err
}
if err := r.kubeClient.CoreV1().Services(cluster.Namespace).Delete(ctx, cluster.Name+"-oauth", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete service")
return err
}
if err := r.kubeClient.CoreV1().Secrets(cluster.Namespace).Delete(ctx, cluster.Name+"-proxy-tls-secret", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete tls secret")
return err
}
if err := r.kubeClient.NetworkingV1().NetworkPolicies(cluster.Namespace).Delete(ctx, cluster.Name+"-workers", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete worker nwp")
return err
}
if err := r.kubeClient.NetworkingV1().NetworkPolicies(cluster.Namespace).Delete(ctx, cluster.Name+"-head", metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
logger.Error(err, "Failed to delete head nwp")
return err
}
return nil
}
16 changes: 8 additions & 8 deletions pkg/controllers/raycluster_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func (w *rayClusterWebhook) Default(ctx context.Context, obj runtime.Object) err

rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes = upsert(rayCluster.Spec.HeadGroupSpec.Template.Spec.Volumes, oauthProxyTLSSecretVolume(rayCluster), withVolumeName(oauthProxyVolumeName))

rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = rayCluster.Name + "-oauth-proxy"
rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName = oauthServiceAccountNameFromCluster(rayCluster)
}

if ptr.Deref(w.Config.MTLSEnabled, true) {
Expand Down Expand Up @@ -218,7 +218,7 @@ func validateIngress(rayCluster *rayv1.RayCluster) field.ErrorList {
func validateHeadGroupServiceAccountName(rayCluster *rayv1.RayCluster) field.ErrorList {
var allErrors field.ErrorList

if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName != rayCluster.Name+"-oauth-proxy" {
if rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName != oauthServiceAccountNameFromCluster(rayCluster) {
allErrors = append(allErrors, field.Invalid(
field.NewPath("spec", "headGroupSpec", "template", "spec", "serviceAccountName"),
rayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName,
Expand All @@ -241,7 +241,7 @@ func oauthProxyContainer(rayCluster *rayv1.RayCluster) corev1.Container {
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: rayCluster.Name + "-oauth-config",
Name: oauthSecretNameFromCluster(rayCluster),
},
Key: "cookie_secret",
},
Expand All @@ -251,7 +251,7 @@ func oauthProxyContainer(rayCluster *rayv1.RayCluster) corev1.Container {
Args: []string{
"--https-address=:8443",
"--provider=openshift",
"--openshift-service-account=" + rayCluster.Name + "-oauth-proxy",
"--openshift-service-account=" + oauthServiceAccountNameFromCluster(rayCluster),
"--upstream=http://localhost:8265",
"--tls-cert=/etc/tls/private/tls.crt",
"--tls-key=/etc/tls/private/tls.key",
Expand All @@ -273,7 +273,7 @@ func oauthProxyTLSSecretVolume(rayCluster *rayv1.RayCluster) corev1.Volume {
Name: oauthProxyVolumeName,
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: rayCluster.Name + "-proxy-tls-secret",
SecretName: oauthServiceTLSSecretName(rayCluster),
},
},
}
Expand Down Expand Up @@ -329,7 +329,7 @@ func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume {
Name: "ca-vol",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: `ca-secret-` + rayCluster.Name,
SecretName: caSecretNameFromCluster(rayCluster),
},
},
},
Expand All @@ -343,9 +343,9 @@ func caVolumes(rayCluster *rayv1.RayCluster) []corev1.Volume {
}

func rayHeadInitContainer(rayCluster *rayv1.RayCluster, config *config.KubeRayConfiguration) corev1.Container {
rayClientRoute := "rayclient-" + rayCluster.Name + "-" + rayCluster.Namespace + "." + config.IngressDomain
rayClientRoute := rayClientNameFromCluster(rayCluster) + "-" + rayCluster.Namespace + "." + config.IngressDomain
// Service name for basic interactive
svcDomain := rayCluster.Name + "-head-svc." + rayCluster.Namespace + ".svc"
svcDomain := serviceNameFromCluster(rayCluster) + "." + rayCluster.Namespace + ".svc"

initContainerHead := corev1.Container{
Name: "create-cert",
Expand Down
42 changes: 28 additions & 14 deletions pkg/controllers/raycluster_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func TestRayClusterWebhookDefault(t *testing.T) {

t.Run("Expected required service account name for the head group", func(t *testing.T) {
test.Expect(validRayCluster.Spec.HeadGroupSpec.Template.Spec.ServiceAccountName).
To(Equal(validRayCluster.Name+"-oauth-proxy"),
To(Equal(oauthServiceAccountNameFromCluster(validRayCluster)),
"Expected the service account name to be set correctly")
})

Expand Down Expand Up @@ -230,7 +230,13 @@ func TestRayClusterWebhookDefault(t *testing.T) {

func TestValidateCreate(t *testing.T) {
test := support.NewTest(t)

emptyRayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: rayClusterName,
Namespace: namespace,
},
Spec: rayv1.RayClusterSpec{},
}
validRayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: rayClusterName,
Expand All @@ -253,7 +259,7 @@ func TestValidateCreate(t *testing.T) {
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: rayClusterName + "-oauth-config",
Name: oauthSecretNameFromCluster(emptyRayCluster),
},
Key: "cookie_secret",
},
Expand All @@ -263,7 +269,7 @@ func TestValidateCreate(t *testing.T) {
Args: []string{
"--https-address=:8443",
"--provider=openshift",
"--openshift-service-account=" + rayClusterName + "-oauth-proxy",
"--openshift-service-account=" + oauthServiceAccountNameFromCluster(emptyRayCluster),
"--upstream=http://localhost:8265",
"--tls-cert=/etc/tls/private/tls.crt",
"--tls-key=/etc/tls/private/tls.key",
Expand All @@ -284,12 +290,12 @@ func TestValidateCreate(t *testing.T) {
Name: oauthProxyVolumeName,
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: rayClusterName + "-proxy-tls-secret",
SecretName: oauthServiceTLSSecretName(emptyRayCluster),
},
},
},
},
ServiceAccountName: rayClusterName + "-oauth-proxy",
ServiceAccountName: oauthServiceAccountNameFromCluster(emptyRayCluster),
},
},
RayStartParams: map[string]string{},
Expand Down Expand Up @@ -351,7 +357,15 @@ func TestValidateCreate(t *testing.T) {

func TestValidateUpdate(t *testing.T) {
test := support.NewTest(t)

emptyRayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: rayClusterName,
Namespace: namespace,
},
Spec: rayv1.RayClusterSpec{},
}
rayClientRoute := rayClientNameFromCluster(emptyRayCluster) + "-" + emptyRayCluster.Namespace + "." + rcWebhook.Config.IngressDomain
svcDomain := serviceNameFromCluster(emptyRayCluster) + "." + emptyRayCluster.Namespace + ".svc"
validRayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: rayClusterName,
Expand All @@ -374,7 +388,7 @@ func TestValidateUpdate(t *testing.T) {
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: rayClusterName + "-oauth-config",
Name: oauthSecretNameFromCluster(emptyRayCluster),
},
Key: "cookie_secret",
},
Expand All @@ -396,7 +410,7 @@ func TestValidateUpdate(t *testing.T) {
Args: []string{
"--https-address=:8443",
"--provider=openshift",
"--openshift-service-account=" + rayClusterName + "-oauth-proxy",
"--openshift-service-account=" + oauthServiceAccountNameFromCluster(emptyRayCluster),
"--upstream=http://localhost:8265",
"--tls-cert=/etc/tls/private/tls.crt",
"--tls-key=/etc/tls/private/tls.key",
Expand All @@ -419,7 +433,7 @@ func TestValidateUpdate(t *testing.T) {
Command: []string{
"sh",
"-c",
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = rayclient-` + rayClusterName + `-` + namespace + `.\nDNS.6 = ` + rayClusterName + `-head-svc.` + namespace + `.svc` + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
`cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = ` + rayClientRoute + `\nDNS.6 = ` + svcDomain + `">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext`,
},
VolumeMounts: []corev1.VolumeMount{
{
Expand All @@ -440,15 +454,15 @@ func TestValidateUpdate(t *testing.T) {
Name: oauthProxyVolumeName,
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: rayClusterName + "-proxy-tls-secret",
SecretName: oauthServiceTLSSecretName(emptyRayCluster),
},
},
},
{
Name: "ca-vol",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: `ca-secret-` + rayClusterName,
SecretName: caSecretNameFromCluster(emptyRayCluster),
},
},
},
Expand All @@ -459,7 +473,7 @@ func TestValidateUpdate(t *testing.T) {
},
},
},
ServiceAccountName: rayClusterName + "-oauth-proxy",
ServiceAccountName: oauthServiceAccountNameFromCluster(emptyRayCluster),
},
},
RayStartParams: map[string]string{},
Expand Down Expand Up @@ -505,7 +519,7 @@ func TestValidateUpdate(t *testing.T) {
Name: "ca-vol",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: `ca-secret-` + rayClusterName,
SecretName: caSecretNameFromCluster(emptyRayCluster),
},
},
},
Expand Down
10 changes: 10 additions & 0 deletions pkg/controllers/support.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package controllers

import (
"crypto/sha256"
"fmt"
"os"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -209,3 +211,11 @@ func ownerRefForRayCluster(cluster *rayv1.RayCluster) *v1.OwnerReferenceApplyCon
WithUID(cluster.UID).
WithController(true)
}

var (
hashLength = 8
)

func seededHash(seed string, s string) string {
return fmt.Sprintf("%x", sha256.Sum256([]byte(seed+s)))[:hashLength]
}

0 comments on commit b1a1fda

Please sign in to comment.