diff --git a/Dockerfile b/Dockerfile index f5c51c3f..26f4c23d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,7 @@ RUN go mod download # Copy the Go sources COPY main.go main.go COPY pkg/ pkg/ +COPY controllers/ controllers/ # Build USER root diff --git a/PROJECT b/PROJECT index 57b3ef28..cb690308 100644 --- a/PROJECT +++ b/PROJECT @@ -1,3 +1,7 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html domain: codeflare.dev layout: - go.kubebuilder.io/v3 @@ -6,4 +10,10 @@ plugins: scorecard.sdk.operatorframework.io/v2: {} projectName: codeflare-operator repo: github.com/project-codeflare/codeflare-operator +resources: +- controller: true + domain: codeflare.dev + group: ray + kind: RayCluster + version: v1 version: "3" diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index a74a16f1..12d4bb24 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -7,6 +7,8 @@ resources: - admin_role.yaml - editor_role.yaml - service_account.yaml +- mcad_manager_role.yaml +- mcad_manager_role_binding.yaml - role.yaml - role_binding.yaml - instascale_role.yaml diff --git a/config/rbac/mcad_manager_role.yaml b/config/rbac/mcad_manager_role.yaml new file mode 100644 index 00000000..b414b8e2 --- /dev/null +++ b/config/rbac/mcad_manager_role.yaml @@ -0,0 +1,223 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: manual-manager-role +rules: +- apiGroups: + - '*' + resources: + - deployments + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - delete + - list + - patch + - update + - watch +- apiGroups: + - apps + resources: + - deployments + - replicasets + - statefulsets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - config.openshift.io + resources: + - clusterversions + verbs: + - get + - list +- apiGroups: + - coordination.k8s.io + resources: + - kube-scheduler + - leases + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - bindings + - pods/binding + verbs: + - create +- apiGroups: + - "" + resources: + - configmaps + - nodes + - persistentvolumeclaims + - persistentvolumes + - secrets + - serviceaccounts + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - endpoints + - kube-scheduler + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - "" + resources: + - kube-scheduler + verbs: + - get + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods/status + verbs: + - patch + - update +- apiGroups: + - "" + resources: + - replicationcontrollers + verbs: + - get + - list + - watch +- apiGroups: + - events.k8s.io + resources: + - events + - kube-scheduler + verbs: + - create + - patch + - update +- apiGroups: + - machine.openshift.io + resources: + - '*' + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - scheduling.sigs.k8s.io + resources: + - podgroups + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +- apiGroups: + - storage.k8s.io + resources: + - csidrivers + - csinodes + - csistoragecapacities + verbs: + - get + - list + - watch +- apiGroups: + - workload.codeflare.dev + resources: + - appwrappers + - appwrappers/finalizers + - appwrappers/status + - schedulingspecs + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +- apiGroups: + - quota.codeflare.dev + resources: + - quotasubtrees + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch diff --git a/config/rbac/mcad_manager_role_binding.yaml b/config/rbac/mcad_manager_role_binding.yaml new file mode 100644 index 00000000..af6c74ae --- /dev/null +++ b/config/rbac/mcad_manager_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: manual-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manual-manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 07954b37..c7ef78b4 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -5,219 +5,62 @@ metadata: creationTimestamp: null name: manager-role rules: -- apiGroups: - - '*' - resources: - - deployments - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - batch - resources: - - jobs - verbs: - - create - - delete - - list - - patch - - update - - watch -- apiGroups: - - apps - resources: - - deployments - - replicasets - - statefulsets - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create -- apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create -- apiGroups: - - config.openshift.io - resources: - - clusterversions - verbs: - - get - - list -- apiGroups: - - coordination.k8s.io - resources: - - kube-scheduler - - leases - verbs: - - create - - get - - update - apiGroups: - "" resources: - - bindings - - pods/binding - verbs: - - create -- apiGroups: - - "" - resources: - - configmaps - - nodes - - persistentvolumeclaims - - persistentvolumes - secrets - - serviceaccounts - - services verbs: - create - delete - get - - list - patch - - update - - watch - apiGroups: - "" resources: - - endpoints - - kube-scheduler - verbs: - - create - - get - - update -- apiGroups: - - "" - resources: - - events - verbs: - - create - - patch - - update -- apiGroups: - - "" - resources: - - kube-scheduler - verbs: - - get - - update -- apiGroups: - - "" - resources: - - pods + - serviceaccounts verbs: - - create - delete - - deletecollection - get - - list - patch - - update - - watch -- apiGroups: - - "" - resources: - - pods/status - verbs: - - patch - - update - apiGroups: - "" resources: - - replicationcontrollers - verbs: - - get - - list - - watch -- apiGroups: - - events.k8s.io - resources: - - events - - kube-scheduler - verbs: - - create - - patch - - update -- apiGroups: - - machine.openshift.io - resources: - - '*' + - services verbs: - - create - delete - get - - list - patch - - update - - watch - apiGroups: - - scheduling.sigs.k8s.io + - ray.io resources: - - podgroups + - rayclusters verbs: - create - delete - - deletecollection - get - list - patch - update - watch - apiGroups: - - storage.k8s.io + - ray.io resources: - - csidrivers - - csinodes - - csistoragecapacities + - rayclusters/finalizers verbs: - - get - - list - - watch + - update - apiGroups: - - workload.codeflare.dev + - ray.io resources: - - appwrappers - - appwrappers/finalizers - - appwrappers/status - - schedulingspecs + - rayclusters/status verbs: - - create - - delete - - deletecollection - get - - list - patch - update - - watch - apiGroups: - - quota.codeflare.dev + - route.openshift.io resources: - - quotasubtrees + - routes verbs: - - create - delete - - deletecollection - get - - list - patch - - update - - watch diff --git a/controllers/raycluster_controller.go b/controllers/raycluster_controller.go new file mode 100644 index 00000000..00d7d559 --- /dev/null +++ b/controllers/raycluster_controller.go @@ -0,0 +1,338 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "crypto/rand" + "crypto/sha1" + "encoding/base64" + "strconv" + + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + coreapply "k8s.io/client-go/applyconfigurations/core/v1" + v1 "k8s.io/client-go/applyconfigurations/meta/v1" + rbacapply "k8s.io/client-go/applyconfigurations/rbac/v1" + "k8s.io/client-go/kubernetes" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + routev1 "github.com/openshift/api/route/v1" + routeapply "github.com/openshift/client-go/route/applyconfigurations/route/v1" + routev1client "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1" +) + +// RayClusterReconciler reconciles a RayCluster object +type RayClusterReconciler struct { + client.Client + kubeClient *kubernetes.Clientset + routeClient *routev1client.RouteV1Client + Scheme *runtime.Scheme + CookieSalt string +} + +const ( + requeueTime = 10 + controllerName = "codeflare-raycluster-controller" + oauthAnnotation = "codeflare.dev/oauth=true" + CodeflareOAuthFinalizer = "codeflare.dev/oauth-finalizer" + OAuthServicePort = 443 + OAuthServicePortName = "oauth-proxy" + strTrue = "true" + strFalse = "false" + logRequeueing = "requeueing" +) + +var ( + deletePolicy = metav1.DeletePropagationForeground + deleteOptions = client.DeleteOptions{PropagationPolicy: &deletePolicy} +) + +//+kubebuilder:rbac:groups=ray.io,resources=rayclusters,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=ray.io,resources=rayclusters/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=ray.io,resources=rayclusters/finalizers,verbs=update +//+kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=patch;delete;get +//+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;patch;delete;get +//+kubebuilder:rbac:groups=core,resources=services,verbs=patch;delete;get +//+kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=patch;delete;get + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the RayCluster object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.3/pkg/reconcile + +func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := ctrl.LoggerFrom(ctx) + + var cluster rayv1.RayCluster + + if err := r.Get(ctx, req.NamespacedName, &cluster); err != nil { + if !errors.IsNotFound(err) { + logger.Error(err, "Error getting RayCluster resource") + } + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + if cluster.ObjectMeta.DeletionTimestamp.IsZero() { + if !controllerutil.ContainsFinalizer(&cluster, CodeflareOAuthFinalizer) { + logger.Info("Add a finalizer", "finalizer", CodeflareOAuthFinalizer) + controllerutil.AddFinalizer(&cluster, CodeflareOAuthFinalizer) + if err := r.Update(ctx, &cluster); err != nil { + // this log is info level since errors are not fatal and are expected + logger.Info("WARN: Failed to update RayCluster with finalizer", "error", err.Error(), logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, err + } + } + } else if controllerutil.ContainsFinalizer(&cluster, CodeflareOAuthFinalizer) { + err := r.deleteIfNotExist( + ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: crbNameFromCluster(&cluster)}, &rbacv1.ClusterRoleBinding{}, + ) + if err != nil { + logger.Error(err, "Failed to remove OAuth ClusterRoleBinding.", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, err + } + controllerutil.RemoveFinalizer(&cluster, CodeflareOAuthFinalizer) + if err := r.Update(ctx, &cluster); err != nil { + logger.Error(err, "Failed to remove finalizer from RayCluster", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, err + } + logger.Info("Successfully removed finalizer.", logRequeueing, strFalse) + return ctrl.Result{}, nil + } + + val, ok := cluster.ObjectMeta.Annotations["codeflare.dev/oauth"] + boolVal, err := strconv.ParseBool(val) + if err != nil { + logger.Error(err, "Could not convert codeflare.dev/oauth value to bool", "codeflare.dev/oauth", val) + } + if !ok || err != nil || !boolVal { + logger.Info("Removing all OAuth Objects") + err := r.deleteIfNotExist( + ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthSecretNameFromCluster(&cluster)}, &corev1.Secret{}, + ) + if err != nil { + logger.Error(err, "Error deleting OAuth Secret, retrying", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, nil + } + err = r.deleteIfNotExist( + ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthServiceNameFromCluster(&cluster)}, &corev1.Service{}, + ) + if err != nil { + logger.Error(err, "Error deleting OAuth Service, retrying", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, nil + } + err = r.deleteIfNotExist( + ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: oauthServiceAccountNameFromCluster(&cluster)}, &corev1.ServiceAccount{}, + ) + if err != nil { + logger.Error(err, "Error deleting OAuth ServiceAccount, retrying", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, nil + } + err = r.deleteIfNotExist( + ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: crbNameFromCluster(&cluster)}, &rbacv1.ClusterRoleBinding{}, + ) + if err != nil { + logger.Error(err, "Error deleting OAuth CRB, retrying", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, nil + } + err = r.deleteIfNotExist( + ctx, types.NamespacedName{Namespace: cluster.Namespace, Name: routeNameFromCluster(&cluster)}, &routev1.Route{}, + ) + if err != nil { + logger.Error(err, "Error deleting OAuth Route, retrying", logRequeueing, strTrue) + return ctrl.Result{RequeueAfter: requeueTime}, nil + } + return ctrl.Result{}, nil + } + + _, err = r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to update OAuth Route") + } + + _, err = r.kubeClient.CoreV1().Secrets(cluster.Namespace).Apply(ctx, desiredOAuthSecret(&cluster, r), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to create OAuth Secret") + } + + _, err = r.kubeClient.CoreV1().Services(cluster.Namespace).Apply(ctx, desiredOAuthService(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to update OAuth Service") + } + + _, err = r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Apply(ctx, desiredServiceAccount(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to update OAuth ServiceAccount") + } + + _, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to update OAuth ClusterRoleBinding") + } + + return ctrl.Result{}, nil +} + +func crbNameFromCluster(cluster *rayv1.RayCluster) string { + return cluster.Name + "-" + cluster.Namespace + "-auth" // NOTE: potential naming conflicts ie {name: foo, ns: bar-baz} and {name: foo-bar, ns: baz} +} + +func (r *RayClusterReconciler) deleteIfNotExist(ctx context.Context, namespacedName types.NamespacedName, obj client.Object) error { + err := r.Client.Get(ctx, namespacedName, obj) + if err != nil && errors.IsNotFound(err) { + return nil + } else if err != nil { + return err + } + return r.Client.Delete(ctx, obj, &deleteOptions) +} + +func desiredOAuthClusterRoleBinding(cluster *rayv1.RayCluster) *rbacapply.ClusterRoleBindingApplyConfiguration { + return rbacapply.ClusterRoleBinding( + crbNameFromCluster(cluster)). + WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). + WithSubjects( + rbacapply.Subject(). + WithKind("ServiceAccount"). + WithName(oauthServiceAccountNameFromCluster(cluster)). + WithNamespace(cluster.Namespace), + ). + WithRoleRef( + rbacapply.RoleRef(). + WithAPIGroup("rbac.authorization.k8s.io"). + WithKind("ClusterRole"). + WithName("system:auth-delegator"), + ). + WithOwnerReferences( + v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion), + ) +} + +func oauthServiceAccountNameFromCluster(cluster *rayv1.RayCluster) string { + return cluster.Name + "-oauth-proxy" +} + +func desiredServiceAccount(cluster *rayv1.RayCluster) *coreapply.ServiceAccountApplyConfiguration { + return coreapply.ServiceAccount(oauthServiceAccountNameFromCluster(cluster), cluster.Namespace). + WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). + WithAnnotations(map[string]string{ + "serviceaccounts.openshift.io/oauth-redirectreference.first": "" + + `{"kind":"OAuthRedirectReference","apiVersion":"v1",` + + `"reference":{"kind":"Route","name":"` + routeNameFromCluster(cluster) + `"}}`, + }). + WithOwnerReferences( + v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion), + ) +} + +func routeNameFromCluster(cluster *rayv1.RayCluster) string { + return "ray-dashboard-" + cluster.Name +} + +func desiredClusterRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConfiguration { + return routeapply.Route(routeNameFromCluster(cluster), cluster.Namespace). + WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). + WithSpec(routeapply.RouteSpec(). + WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(oauthServiceNameFromCluster(cluster))). + WithPort(routeapply.RoutePort().WithTargetPort(intstr.FromString((OAuthServicePortName)))). + WithTLS(routeapply.TLSConfig(). + WithInsecureEdgeTerminationPolicy(routev1.InsecureEdgeTerminationPolicyRedirect). + WithTermination(routev1.TLSTerminationReencrypt), + ), + ). + WithOwnerReferences( + v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion), + ) +} + +func oauthServiceNameFromCluster(cluster *rayv1.RayCluster) string { + return cluster.Name + "-oauth" +} + +func oauthServiceTLSSecretName(cluster *rayv1.RayCluster) string { + return cluster.Name + "-proxy-tls-secret" +} + +func desiredOAuthService(cluster *rayv1.RayCluster) *coreapply.ServiceApplyConfiguration { + return coreapply.Service(oauthServiceNameFromCluster(cluster), cluster.Namespace). + WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). + WithAnnotations(map[string]string{"service.beta.openshift.io/serving-cert-secret-name": oauthServiceTLSSecretName(cluster)}). + WithSpec( + coreapply.ServiceSpec(). + WithPorts( + coreapply.ServicePort(). + WithName(OAuthServicePortName). + WithPort(OAuthServicePort). + WithTargetPort(intstr.FromString(OAuthServicePortName)). + WithProtocol(corev1.ProtocolTCP), + ). + WithSelector(map[string]string{"ray.io/cluster": cluster.Name, "ray.io/node-type": "head"}), + ). + WithOwnerReferences( + v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion), + ) +} + +func oauthSecretNameFromCluster(cluster *rayv1.RayCluster) string { + return cluster.Name + "-oauth-config" +} + +// desiredOAuthSecret defines the desired OAuth secret object +func desiredOAuthSecret(cluster *rayv1.RayCluster, r *RayClusterReconciler) *coreapply.SecretApplyConfiguration { + // Generate the cookie secret for the OAuth proxy + hasher := sha1.New() // REVIEW is SHA1 okay here? + hasher.Write([]byte(cluster.Name + r.CookieSalt)) + cookieSecret := base64.StdEncoding.EncodeToString(hasher.Sum(nil)) + + return coreapply.Secret(oauthSecretNameFromCluster(cluster), cluster.Namespace). + WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). + WithStringData(map[string]string{"cookie_secret": cookieSecret}). + WithOwnerReferences( + v1.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion), + ) + // Create a Kubernetes secret to store the cookie secret +} + +// SetupWithManager sets up the controller with the Manager. +func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.kubeClient = kubernetes.NewForConfigOrDie(mgr.GetConfig()) + r.routeClient = routev1client.NewForConfigOrDie(mgr.GetConfig()) + b := make([]byte, 16) + _, err := rand.Read(b) + if err != nil { + return err + } + r.CookieSalt = string(b) + return ctrl.NewControllerManagedBy(mgr). + Named(controllerName). + For(&rayv1.RayCluster{}). + Complete(r) +} diff --git a/controllers/raycluster_controller_test.go b/controllers/raycluster_controller_test.go new file mode 100644 index 00000000..0facb0cb --- /dev/null +++ b/controllers/raycluster_controller_test.go @@ -0,0 +1,222 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "math/rand" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + routev1 "github.com/openshift/api/route/v1" +) + +func stringInList(l []string, s string) bool { + for _, i := range l { + if i == s { + return true + } + } + return false +} + +var letters = []rune("abcdefghijklmnopqrstuvwxyz") +var r = rand.New(rand.NewSource(time.Now().UnixNano())) + +func randSeq(n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letters[r.Intn(len(letters))] + } + return string(b) +} + +var _ = Describe("RayCluster controller", func() { + Context("RayCluster controller test", func() { + var rayClusterName = "test-raycluster" + var typeNamespaceName types.NamespacedName + ctx := context.Background() + BeforeEach(func() { + By("Generate random number so each run is creating unique") + rString := randSeq(10) + rayClusterName = rayClusterName + "-" + rString + typeNamespaceName = types.NamespacedName{Name: rayClusterName, Namespace: rayClusterName} + By("Creating a namespace for running the tests.") + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: rayClusterName, + }, + } + var err error + err = k8sClient.Create(ctx, namespace) + Expect(err).To(Not(HaveOccurred())) + + By("creating a basic instance of the RayCluster CR") + raycluster := &rayv1.RayCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: rayClusterName, + Namespace: rayClusterName, + }, + Spec: rayv1.RayClusterSpec{ + HeadGroupSpec: rayv1.HeadGroupSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + corev1.Container{}, + }, + }, + }, + RayStartParams: map[string]string{}, + }, + }, + } + err = k8sClient.Get(ctx, typeNamespaceName, &rayv1.RayCluster{}) + Expect(errors.IsNotFound(err)).To(Equal(true)) + err = k8sClient.Create(ctx, raycluster) + Expect(err).To(Not(HaveOccurred())) + }) + + AfterEach(func() { + By("removing the instance of the RayCluster used") + // err := clientSet.CoreV1().Namespaces().Delete(ctx, RayClusterName, metav1.DeleteOptions{}) + foundRayCluster := rayv1.RayCluster{} + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + if err != nil { + Expect(errors.IsNotFound(err)).To(Equal(true)) + } else { + Expect(err).To(Not(HaveOccurred())) + _ = k8sClient.Delete(ctx, &foundRayCluster) + } + Eventually(func() bool { + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + return errors.IsNotFound(err) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + }) + + It("should have oauth finalizer set", func() { + foundRayCluster := rayv1.RayCluster{} + Eventually(func() bool { + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + Expect(err).To(Not(HaveOccurred())) + return stringInList(foundRayCluster.Finalizers, CodeflareOAuthFinalizer) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + }) + + Context("Cluster has OAuth annotation", func() { + BeforeEach(func() { + By("adding OAuth annotation to RayCluster") + Eventually(func() error { + foundRayCluster := rayv1.RayCluster{} + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + Expect(err).To(Not(HaveOccurred())) + if foundRayCluster.Annotations == nil { + foundRayCluster.Annotations = map[string]string{"codeflare.dev/oauth": "true"} + } else { + foundRayCluster.Annotations["codeflare.dev/oauth"] = "'true'" + } + return k8sClient.Update(ctx, &foundRayCluster) + }, SpecTimeout(time.Second*10)).Should(Not(HaveOccurred())) + By("waiting for dependent resources to be created") + Eventually(func() error { + foundRayCluster := rayv1.RayCluster{} + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + if err != nil { + return err + } + err = k8sClient.Get(ctx, types.NamespacedName{Name: oauthSecretNameFromCluster(&foundRayCluster), Namespace: foundRayCluster.Namespace}, &corev1.Secret{}) + if err != nil { + return err + } + err = k8sClient.Get(ctx, types.NamespacedName{Name: oauthServiceNameFromCluster(&foundRayCluster), Namespace: foundRayCluster.Namespace}, &corev1.Service{}) + if err != nil { + return err + } + err = k8sClient.Get(ctx, types.NamespacedName{Name: foundRayCluster.Name, Namespace: foundRayCluster.Namespace}, &corev1.ServiceAccount{}) + if err != nil { + return err + } + err = k8sClient.Get(ctx, types.NamespacedName{Name: crbNameFromCluster(&foundRayCluster)}, &rbacv1.ClusterRoleBinding{}) + if err != nil { + return err + } + err = k8sClient.Get(ctx, types.NamespacedName{Name: foundRayCluster.Name, Namespace: foundRayCluster.Namespace}, &routev1.Route{}) + if err != nil { + return err + } + return nil + }, SpecTimeout(time.Second*10)).Should(Not(HaveOccurred())) + }) + + It("should set owner references for all resources", func() { + foundRayCluster := rayv1.RayCluster{} + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + Expect(err).ToNot(HaveOccurred()) + err = k8sClient.Get(ctx, types.NamespacedName{Name: oauthSecretNameFromCluster(&foundRayCluster), Namespace: foundRayCluster.Namespace}, &corev1.Secret{}) + Expect(err).To(Not(HaveOccurred())) + err = k8sClient.Get(ctx, types.NamespacedName{Name: oauthServiceNameFromCluster(&foundRayCluster), Namespace: foundRayCluster.Namespace}, &corev1.Service{}) + Expect(err).To(Not(HaveOccurred())) + err = k8sClient.Get(ctx, types.NamespacedName{Name: foundRayCluster.Name, Namespace: foundRayCluster.Namespace}, &corev1.ServiceAccount{}) + Expect(err).To(Not(HaveOccurred())) + err = k8sClient.Get(ctx, types.NamespacedName{Name: crbNameFromCluster(&foundRayCluster)}, &rbacv1.ClusterRoleBinding{}) + Expect(err).To(Not(HaveOccurred())) + err = k8sClient.Get(ctx, types.NamespacedName{Name: foundRayCluster.Name, Namespace: foundRayCluster.Namespace}, &routev1.Route{}) + Expect(err).To(Not(HaveOccurred())) + }) + + It("should delete OAuth resources when annotation is removed", func() { + foundRayCluster := rayv1.RayCluster{} + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + Expect(err).To(Not(HaveOccurred())) + delete(foundRayCluster.Annotations, "codeflare.dev/oauth") + err = k8sClient.Update(ctx, &foundRayCluster) + Expect(err).To(Not(HaveOccurred())) + Eventually(func() bool { + return errors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: oauthSecretNameFromCluster(&foundRayCluster), Namespace: foundRayCluster.Namespace}, &corev1.Secret{})) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + Eventually(func() bool { + return errors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: oauthServiceNameFromCluster(&foundRayCluster), Namespace: foundRayCluster.Namespace}, &corev1.Service{})) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + Eventually(func() bool { + return errors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: foundRayCluster.Name, Namespace: foundRayCluster.Namespace}, &corev1.ServiceAccount{})) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + Eventually(func() bool { + return errors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: crbNameFromCluster(&foundRayCluster)}, &rbacv1.ClusterRoleBinding{})) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + }) + + It("should remove CRB when the RayCluster is deleted", func() { + foundRayCluster := rayv1.RayCluster{} + err := k8sClient.Get(ctx, typeNamespaceName, &foundRayCluster) + Expect(err).To(Not(HaveOccurred())) + err = k8sClient.Delete(ctx, &foundRayCluster) + Expect(err).To(Not(HaveOccurred())) + Eventually(func() bool { + return errors.IsNotFound(k8sClient.Get(ctx, types.NamespacedName{Name: crbNameFromCluster(&foundRayCluster)}, &rbacv1.ClusterRoleBinding{})) + }, SpecTimeout(time.Second*10)).Should(Equal(true)) + }) + }) + }) +}) diff --git a/controllers/suite_test.go b/controllers/suite_test.go new file mode 100644 index 00000000..0110179f --- /dev/null +++ b/controllers/suite_test.go @@ -0,0 +1,135 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "io" + "net/http" + "os" + "path/filepath" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + routev1 "github.com/openshift/api/route/v1" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestAPIs(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +const ( + RayClusterCRDFileDownload = "https://raw.githubusercontent.com/ray-project/kuberay/master/ray-operator/config/crd/bases/ray.io_rayclusters.yaml" + RouteCRDFileDownload = "https://raw.githubusercontent.com/openshift/api/master/route/v1/route.crd.yaml" +) + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + var err error + var fRoute, fRaycluster *os.File + + By("Creating and downloading necessary crds") + err = os.Mkdir("./test-crds", os.ModePerm) + Expect(err).ToNot(HaveOccurred()) + fRoute, err = os.Create("./test-crds/route.yaml") + Expect(err).ToNot(HaveOccurred()) + defer fRoute.Close() + resp, err := http.Get(RouteCRDFileDownload) + Expect(err).ToNot(HaveOccurred()) + _, err = io.Copy(fRoute, resp.Body) + Expect(err).ToNot(HaveOccurred()) + fRaycluster, err = os.Create("./test-crds/raycluster.yaml") + Expect(err).ToNot(HaveOccurred()) + defer fRaycluster.Close() + resp, err = http.Get(RayClusterCRDFileDownload) + Expect(err).ToNot(HaveOccurred()) + _, err = io.Copy(fRaycluster, resp.Body) + Expect(err).ToNot(HaveOccurred()) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{ + filepath.Join("..", "config", "crd"), + filepath.Join(".", "test-crds"), + }, + ErrorIfCRDPathMissing: true, + } + + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + clientSet, err := kubernetes.NewForConfig(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + err = rayv1.AddToScheme(scheme.Scheme) + Expect(err).To(Not(HaveOccurred())) + err = routev1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + }) + Expect(err).NotTo(HaveOccurred()) + err = (&RayClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + kubeClient: clientSet, + CookieSalt: "foo", + }).SetupWithManager(k8sManager) + Expect(err).NotTo(HaveOccurred()) + go func() { + defer GinkgoRecover() + err = k8sManager.Start(context.Background()) + Expect(err).ToNot(HaveOccurred(), "failed to run manager") + }() +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := os.RemoveAll("./test-crds") + Expect(err).NotTo(HaveOccurred()) + err = testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/go.mod b/go.mod index 7f846b2b..93fa2c9f 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,11 @@ module github.com/project-codeflare/codeflare-operator go 1.20 require ( + github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 github.com/openshift/api v0.0.0-20230213134911-7ba313770556 - github.com/project-codeflare/codeflare-common v0.0.0-20240201153809-2e7292120303 + github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c + github.com/project-codeflare/codeflare-common v0.0.0-20240207083912-d7a229270a0a github.com/project-codeflare/instascale v0.4.0 github.com/project-codeflare/multi-cluster-app-dispatcher v1.40.0 github.com/ray-project/kuberay/ray-operator v1.0.0 @@ -46,6 +48,7 @@ require ( github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/jsonreference v0.20.1 // indirect github.com/go-openapi/swag v0.22.3 // indirect + github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v4 v4.4.3 // indirect github.com/golang/glog v1.1.2 // indirect @@ -55,6 +58,7 @@ require ( github.com/google/gnostic v0.6.9 // indirect github.com/google/go-cmp v0.5.9 // indirect github.com/google/gofuzz v1.2.0 // indirect + github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect github.com/google/uuid v1.3.0 // indirect github.com/gorilla/css v1.0.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect @@ -72,7 +76,6 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/openshift-online/ocm-sdk-go v0.1.368 // indirect - github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/client_model v0.5.0 // indirect @@ -102,6 +105,7 @@ require ( golang.org/x/term v0.16.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/time v0.3.0 // indirect + golang.org/x/tools v0.12.0 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 // indirect diff --git a/go.sum b/go.sum index 60345cd5..7d5f4c95 100644 --- a/go.sum +++ b/go.sum @@ -142,6 +142,7 @@ github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+ github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= +github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -216,6 +217,7 @@ github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= +github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= @@ -366,6 +368,7 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= github.com/onsi/ginkgo/v2 v2.1.4/go.mod h1:um6tUpWM/cxCK3/FK8BXqEiUMUwRgSM4JXG47RKZmLU= github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= +github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= @@ -384,8 +387,8 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/project-codeflare/codeflare-common v0.0.0-20240201153809-2e7292120303 h1:30LG8751WElZmWA3mVS8l23l2oZnUCqbDkLCyy0U/p0= -github.com/project-codeflare/codeflare-common v0.0.0-20240201153809-2e7292120303/go.mod h1:2Ck9LC+6Xi4jTDSlCJoP00tCzSrxek0roLsjvUgL2gY= +github.com/project-codeflare/codeflare-common v0.0.0-20240207083912-d7a229270a0a h1:Yk9J5qXjp+yfSRCzS0EElrhpTgfYJ+S+W/z84cmlmX4= +github.com/project-codeflare/codeflare-common v0.0.0-20240207083912-d7a229270a0a/go.mod h1:2Ck9LC+6Xi4jTDSlCJoP00tCzSrxek0roLsjvUgL2gY= github.com/project-codeflare/instascale v0.4.0 h1:l/cb+x4FrJ2bN9wXjv1mCngy77tVw0CLMiqJovTAflo= github.com/project-codeflare/instascale v0.4.0/go.mod h1:CpduFXKeuqYW4Ph1CPOJV6dpAdpebOxhbU4CmccZWSo= github.com/project-codeflare/multi-cluster-app-dispatcher v1.40.0 h1:IkTmd/W/zxcsC5s4EbnW74PFpkQVEiTc/8rWWwFw0Ok= @@ -450,6 +453,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= @@ -569,6 +573,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -764,6 +769,7 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss= +golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/main.go b/main.go index 5e2bb6e3..58aa0289 100644 --- a/main.go +++ b/main.go @@ -30,13 +30,16 @@ import ( quotasubtreev1alpha1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/quotaplugins/quotasubtree/v1alpha1" mcadconfig "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/config" mcad "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob" + rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" "go.uber.org/zap/zapcore" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/discovery" "k8s.io/client-go/kubernetes" clientgoscheme "k8s.io/client-go/kubernetes/scheme" _ "k8s.io/client-go/plugin/pkg/client/auth" @@ -51,7 +54,9 @@ import ( configv1 "github.com/openshift/api/config/v1" machinev1beta1 "github.com/openshift/api/machine/v1beta1" + routev1 "github.com/openshift/api/route/v1" + cfoControllers "github.com/project-codeflare/codeflare-operator/controllers" "github.com/project-codeflare/codeflare-operator/pkg/config" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. @@ -75,6 +80,10 @@ func init() { // InstaScale utilruntime.Must(configv1.Install(scheme)) utilruntime.Must(machinev1beta1.Install(scheme)) + // Ray + utilruntime.Must(rayv1.AddToScheme(scheme)) + // OpenShift Route + utilruntime.Must(routev1.Install(scheme)) } func main() { @@ -171,6 +180,13 @@ func main() { exitOnError(instaScaleController.SetupWithManager(context.Background(), mgr), "Error setting up InstaScale controller") } + if v, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")); v { + rayClusterController := cfoControllers.RayClusterReconciler{Client: mgr.GetClient(), Scheme: mgr.GetScheme()} + exitOnError(rayClusterController.SetupWithManager(mgr), "Error setting up RayCluster controller") + } else if err != nil { + exitOnError(err, "Could not determine if RayCluster CR present on cluster.") + } + exitOnError(mgr.AddHealthzCheck(cfg.Health.LivenessEndpointName, healthz.Ping), "unable to set up health check") exitOnError(mgr.AddReadyzCheck(cfg.Health.ReadinessEndpointName, healthz.Ping), "unable to set up ready check") @@ -221,6 +237,23 @@ func createConfigMap(ctx context.Context, client kubernetes.Interface, ns, name return err } +func HasAPIResourceForGVK(dc discovery.DiscoveryInterface, gvk schema.GroupVersionKind) (bool, error) { + gv, kind := gvk.ToAPIVersionAndKind() + if resources, err := dc.ServerResourcesForGroupVersion(gv); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } else { + for _, res := range resources.APIResources { + if res.Kind == kind { + return true, nil + } + } + } + return false, nil +} + func namespaceOrDie() string { // This way assumes you've set the NAMESPACE environment variable either manually, when running // the operator standalone, or using the downward API, when running the operator in-cluster.