diff --git a/docs/source/operators/deploy-kubernetes.md b/docs/source/operators/deploy-kubernetes.md index 931a1531..97ce1e00 100644 --- a/docs/source/operators/deploy-kubernetes.md +++ b/docs/source/operators/deploy-kubernetes.md @@ -522,7 +522,11 @@ can override them with helm's `--set` or `--values` options. Always use `--set` | `kip.defaultContainerRegistry` | Prefix to use if a registry is not already specified on image name (e.g., elyra/kernel-py:VERSION) | `docker.io` | | `kip.fetcher` | fetcher to fetch image names, defaults to KernelSpecsFetcher | `KernelSpecsFetcher` | | `kip.images` | if StaticListFetcher is used KIP_IMAGES defines the list of images pullers will fetch | `[]` | -| `kip.internalFetcher ` | if CombinedImagesFetcher is used KIP_INTERNAL_FETCHERS defines the fetchers that get used internally | `KernelSpecsFetcher` | +| `kip.internalFetcher ` | if CombinedImagesFetcher is used KIP_INTERNAL_FETCHERS defines the fetchers that get used internally | | +| `externalCluster.enable` | Launch kernels in a remote cluster. Used for multi-cluster environments. **Must place a kubeconfig file in the `config/` folder of the helm chart**. | `false` | +| `externalCluster.configPath` | Path to mount kubeconfig at | `/etc/kube/config` | +| `externalCluster.configFilename` | Filename to kubeconfig file inside `config/` directory of chart | `kubeconfig` | +| `externalCluster.autoConfigureRemote` | Automatically create service account in remote cluster | | ## Uninstalling Enterprise Gateway @@ -958,6 +962,29 @@ Of particular importance is the mapping to port `8888` (e.g.,`32422`). If you ar The value of the `JUPYTER_GATEWAY_URL` used by the gateway-enabled Notebook server will vary depending on whether you choose to define an external IP or not. If and external IP is defined, you'll set `JUPYTER_GATEWAY_URL=:8888` else you'll set `JUPYTER_GATEWAY_URL=:32422` **but also need to restart clients each time Enterprise Gateway is started.** As a result, use of the `externalIPs:` value is highly recommended. +## Multi-Cluster Environments + +### Overview + +With `externalCluster.enabled` set to `true`, Enterprise Gateway can be used on multi-cluster environments where the jupyter enterprise gateway pods and kernel pods are launched on separate clusters. To configure this: + +1. Ensure your two clusters have interconnceted networks. Pods in the two clusters must be able to communicate with each other over pod IP alone. +1. Provide a kubeconfig file for use in the `config/` subdirectory of `etc/kubernetes/helm/enterprise-gateway` chart. +1. Set `externalCluster.enabled` to `true`. + +Enterprise Gateway will now launch kernel pods in whichever cluster you have set to default in your kubeconfig. + +### Resources in Remote Clusters + +For Enterprise Gateway to work across clusters, Enterprise Gateway must create the following resources in the cluster your kernels will be launched on. + +- The kernel resource. +- A service account for the kernel pods (if `externalCluster.autoConfigureRemote` is set to `true`). +- A namespaced role for the namespace where your kernel pods will be launched. +- A role binding between your namespaced role and your service account. + +The role resource is defined in the `templates/kernel-role.yaml` template of the helm chart. Permissions can be set there. + ## Kubernetes Tips The following items illustrate some useful commands for navigating Enterprise Gateway within a kubernetes environment. diff --git a/enterprise_gateway/services/processproxies/crd.py b/enterprise_gateway/services/processproxies/crd.py index 7e47a5dc..ba3fc2eb 100644 --- a/enterprise_gateway/services/processproxies/crd.py +++ b/enterprise_gateway/services/processproxies/crd.py @@ -10,6 +10,8 @@ from kubernetes import client +from enterprise_gateway.services.processproxies.k8s_client import kubernetes_client + from ..kernels.remotemanager import RemoteKernelManager from .k8s import KubernetesProcessProxy @@ -105,7 +107,9 @@ def delete_managed_object(self, termination_stati: list[str]) -> bool: Note: the caller is responsible for handling exceptions. """ - delete_status = client.CustomObjectsApi().delete_namespaced_custom_object( + delete_status = client.CustomObjectsApi( + api_client=kubernetes_client + ).delete_namespaced_custom_object( self.group, self.version, self.kernel_namespace, diff --git a/enterprise_gateway/services/processproxies/k8s.py b/enterprise_gateway/services/processproxies/k8s.py index d128d562..9892a153 100644 --- a/enterprise_gateway/services/processproxies/k8s.py +++ b/enterprise_gateway/services/processproxies/k8s.py @@ -10,10 +10,15 @@ from typing import Any import urllib3 -from kubernetes import client, config +import yaml +from kubernetes import client +from kubernetes.utils.create_from_yaml import create_from_yaml_single_item + +from enterprise_gateway.services.processproxies.k8s_client import kubernetes_client from ..kernels.remotemanager import RemoteKernelManager from ..sessions.kernelsessionmanager import KernelSessionManager +from ..utils.envutils import is_env_true from .container import ContainerProcessProxy urllib3.disable_warnings() @@ -29,8 +34,6 @@ share_gateway_namespace = bool(os.environ.get("EG_SHARED_NAMESPACE", "False").lower() == "true") kpt_dir = os.environ.get("EG_POD_TEMPLATE_DIR", "/tmp") # noqa -config.load_incluster_config() - class KubernetesProcessProxy(ContainerProcessProxy): """ @@ -56,6 +59,12 @@ async def launch_process( ) -> KubernetesProcessProxy: """Launches the specified process within a Kubernetes environment.""" # Set env before superclass call, so we can see these in the debug output + use_remote_cluster = os.getenv("EG_USE_REMOTE_CLUSTER") + if use_remote_cluster: + kwargs["env"]["EG_USE_REMOTE_CLUSTER"] = 'true' + kwargs["env"]["EG_REMOTE_CLUSTER_KUBECONFIG_PATH"] = os.getenv( + "EG_REMOTE_CLUSTER_KUBECONFIG_PATH" + ) # Kubernetes relies on internal env variables to determine its configuration. When # running within a K8s cluster, these start with KUBERNETES_SERVICE, otherwise look @@ -85,7 +94,7 @@ def get_container_status(self, iteration: int | None) -> str: # is used for the assigned_ip. pod_status = "" kernel_label_selector = "kernel_id=" + self.kernel_id + ",component=kernel" - ret = client.CoreV1Api().list_namespaced_pod( + ret = client.CoreV1Api(api_client=kubernetes_client).list_namespaced_pod( namespace=self.kernel_namespace, label_selector=kernel_label_selector ) if ret and ret.items: @@ -121,7 +130,7 @@ def delete_managed_object(self, termination_stati: list[str]) -> bool: # Deleting a Pod will return a v1.Pod if found and its status will be a PodStatus containing # a phase string property # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.21/#podstatus-v1-core - v1_pod = client.CoreV1Api().delete_namespaced_pod( + v1_pod = client.CoreV1Api(api_client=kubernetes_client).delete_namespaced_pod( namespace=self.kernel_namespace, body=body, name=self.container_name ) status = None @@ -168,7 +177,7 @@ def terminate_container_resources(self) -> bool | None: body = client.V1DeleteOptions( grace_period_seconds=0, propagation_policy="Background" ) - v1_status = client.CoreV1Api().delete_namespace( + v1_status = client.CoreV1Api(api_client=kubernetes_client).delete_namespace( name=self.kernel_namespace, body=body ) status = None @@ -239,7 +248,7 @@ def _determine_kernel_namespace(self, **kwargs: dict[str, Any] | None) -> str: # If KERNEL_NAMESPACE was provided, then we assume it already exists. If not provided, then we'll # create the namespace and record that we'll want to delete it as well. - namespace = kwargs["env"].get("KERNEL_NAMESPACE") + namespace = os.environ.get("KERNEL_NAMESPACE") if namespace is None: # check if share gateway namespace is configured... if share_gateway_namespace: # if so, set to EG namespace @@ -283,10 +292,17 @@ def _create_kernel_namespace(self, service_account_name: str) -> str: # create the namespace try: - client.CoreV1Api().create_namespace(body=body) + client.CoreV1Api(api_client=kubernetes_client).create_namespace(body=body) self.delete_kernel_namespace = True self.log.info(f"Created kernel namespace: {namespace}") + # If remote cluster is being used, service account may not be present, create before role binding + # If creating service account is disabled, operator must manually create svc account + if is_env_true('EG_USE_REMOTE_CLUSTER') and is_env_true('EG_CREATE_REMOTE_SVC_ACCOUNT'): + self._create_service_account_if_not_exists( + namespace=namespace, service_account_name=service_account_name + ) + # Now create a RoleBinding for this namespace for the default ServiceAccount. We'll reference # the ClusterRole, but that will only be applied for this namespace. This prevents the need for # creating a role each time. @@ -310,7 +326,9 @@ def _create_kernel_namespace(self, service_account_name: str) -> str: body = client.V1DeleteOptions( grace_period_seconds=0, propagation_policy="Background" ) - client.CoreV1Api().delete_namespace(name=namespace, body=body) + client.CoreV1Api(api_client=kubernetes_client).delete_namespace( + name=namespace, body=body + ) self.log.warning(f"Deleted kernel namespace: {namespace}") else: reason = f"Error occurred creating namespace '{namespace}': {err}" @@ -318,6 +336,56 @@ def _create_kernel_namespace(self, service_account_name: str) -> str: return namespace + def _create_service_account_if_not_exists( + self, namespace: str, service_account_name: str + ) -> None: + """If service account doesn't exist in target cluster, create one. Occurs if a remote cluster is being used.""" + service_account_list_in_namespace: client.V1ServiceAccountList = client.CoreV1Api( + api_client=kubernetes_client + ).list_namespaced_service_account(namespace=namespace) + + service_accounts_in_namespace: list[ + client.V1ServiceAccount + ] = service_account_list_in_namespace.items + service_account_names_in_namespace: list[str] = [ + svcaccount.metadata.name for svcaccount in service_accounts_in_namespace + ] + + if service_account_name not in service_account_names_in_namespace: + service_account_metadata = {"name": service_account_name} + service_account_to_create: client.V1ServiceAccount = client.V1ServiceAccount( + kind="ServiceAccount", metadata=service_account_metadata + ) + + client.CoreV1Api(api_client=kubernetes_client).create_namespaced_service_account( + namespace=namespace, body=service_account_to_create + ) + + self.log.info( + f"Created service account {service_account_name} in namespace {namespace}" + ) + + def _create_role_if_not_exists(self, namespace: str) -> None: + """If role doesn't exist in target cluster, create one. Occurs if a remote cluster is being used""" + role_yaml_path = os.getenv('EG_REMOTE_CLUSTER_ROLE_PATH') + + # Get Roles in remote cluster + remote_cluster_roles: client.V1RoleList = client.RbacAuthorizationV1Api( + api_client=kubernetes_client + ).list_namespaced_role(namespace=namespace) + remote_cluster_role_names = [role.metadata.name for role in remote_cluster_roles.items] + + # If the kernel Role does not exist in the remote cluster. + if kernel_cluster_role not in remote_cluster_role_names: + with open(role_yaml_path) as f: + role_yaml = yaml.safe_load(f) + role_yaml["metadata"]["namespace"] = namespace + create_from_yaml_single_item(yml_object=role_yaml, k8s_client=kubernetes_client) + + self.log.info(f"Created role {kernel_cluster_role} in namespace {namespace}") + else: + self.log.info(f"Found role {kernel_cluster_role} in namespace {namespace}") + def _create_role_binding(self, namespace: str, service_account_name: str) -> None: # Creates RoleBinding instance for the given namespace. The role used will be the ClusterRole named by # EG_KERNEL_CLUSTER_ROLE. @@ -330,9 +398,17 @@ def _create_role_binding(self, namespace: str, service_account_name: str) -> Non role_binding_name = kernel_cluster_role # use same name for binding as cluster role labels = {"app": "enterprise-gateway", "component": "kernel", "kernel_id": self.kernel_id} binding_metadata = client.V1ObjectMeta(name=role_binding_name, labels=labels) - binding_role_ref = client.V1RoleRef( - api_group="", kind="ClusterRole", name=kernel_cluster_role - ) + + # If remote cluster is used, we need to create a role on that cluster + if is_env_true('EG_USE_REMOTE_CLUSTER'): + self._create_role_if_not_exists(namespace=namespace) + # We use namespaced roles on remote clusters rather than a ClusterRole + binding_role_ref = client.V1RoleRef(api_group="", kind="Role", name=kernel_cluster_role) + else: + binding_role_ref = client.V1RoleRef( + api_group="", kind="ClusterRole", name=kernel_cluster_role + ) + binding_subjects = client.V1Subject( api_group="", kind="ServiceAccount", name=service_account_name, namespace=namespace ) @@ -344,7 +420,7 @@ def _create_role_binding(self, namespace: str, service_account_name: str) -> Non subjects=[binding_subjects], ) - client.RbacAuthorizationV1Api().create_namespaced_role_binding( + client.RbacAuthorizationV1Api(api_client=kubernetes_client).create_namespaced_role_binding( namespace=namespace, body=body ) self.log.info( diff --git a/enterprise_gateway/services/processproxies/k8s_client.py b/enterprise_gateway/services/processproxies/k8s_client.py new file mode 100644 index 00000000..369aebd6 --- /dev/null +++ b/enterprise_gateway/services/processproxies/k8s_client.py @@ -0,0 +1,5 @@ +"""Instantiates a static global factory and a single atomic client""" +from enterprise_gateway.services.processproxies.k8s_client_factory import KubernetesClientFactory + +KUBERNETES_CLIENT_FACTORY = KubernetesClientFactory() +kubernetes_client = KUBERNETES_CLIENT_FACTORY.get_kubernetes_client() diff --git a/enterprise_gateway/services/processproxies/k8s_client_factory.py b/enterprise_gateway/services/processproxies/k8s_client_factory.py new file mode 100644 index 00000000..c0576cc6 --- /dev/null +++ b/enterprise_gateway/services/processproxies/k8s_client_factory.py @@ -0,0 +1,42 @@ +"""Contains factory to create kubernetes api client instances using a single confguration""" +import os + +from kubernetes import client, config +from traitlets.config import SingletonConfigurable + +from enterprise_gateway.services.utils.envutils import is_env_true + + +class KubernetesClientFactory(SingletonConfigurable): + """Manages kubernetes client creation from environment variables""" + + def get_kubernetes_client(self) -> client.ApiClient: + """Get kubernetes api client with appropriate configuration + Returns: + ApiClient: Kubernetes API client for appropriate cluster + """ + kubernetes_config: client.Configuration = client.Configuration() + if os.getenv("KUBERNETES_SERVICE_HOST"): + # Running inside cluster + if is_env_true('EG_USE_REMOTE_CLUSTER') and not is_env_true('EG_SHARED_NAMESPACE'): + kubeconfig_path = os.getenv( + 'EG_REMOTE_CLUSTER_KUBECONFIG_PATH', '/etc/kube/config/kubeconfig' + ) + context = os.getenv('EG_REMOTE_CLUSTER_CONTEXT', None) + config.load_kube_config( + client_configuration=kubernetes_config, + config_file=kubeconfig_path, + context=context, + ) + else: + if is_env_true('EG_USE_REMOTE_CLUSTER'): + self.log.warning( + "Cannot use EG_USE_REMOTE_CLUSTER and EG_SHARED_NAMESPACE at the same time. Using local cluster...." + ) + + config.load_incluster_config(client_configuration=kubernetes_config) + else: + config.load_kube_config(client_configuration=kubernetes_config) + + self.log.debug(f"Created kubernetes client for host {kubernetes_config.host}") + return client.ApiClient(kubernetes_config) diff --git a/enterprise_gateway/services/utils/__init__.py b/enterprise_gateway/services/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/enterprise_gateway/services/utils/envutils.py b/enterprise_gateway/services/utils/envutils.py new file mode 100644 index 00000000..a8135edd --- /dev/null +++ b/enterprise_gateway/services/utils/envutils.py @@ -0,0 +1,7 @@ +"""""Utilities to make checking environment variables easier""" +import os + + +def is_env_true(env_variable_name: str) -> bool: + """If environment variable is set and value is case-insensitively "true", then return true. Else return false""" + return bool(os.getenv(env_variable_name, "False").lower() == "true") diff --git a/etc/docker/enterprise-gateway/Dockerfile b/etc/docker/enterprise-gateway/Dockerfile index b13a4c1c..23ef30ce 100644 --- a/etc/docker/enterprise-gateway/Dockerfile +++ b/etc/docker/enterprise-gateway/Dockerfile @@ -25,6 +25,20 @@ RUN apt update && apt install -yq curl openjdk-8-jdk ENV JAVA_HOME /usr/lib/jvm/java RUN ln -s $(readlink -f /usr/bin/javac | sed "s:/bin/javac::") ${JAVA_HOME} +RUN curl https://apt.releases.teleport.dev/gpg \ +-o /usr/share/keyrings/teleport-archive-keyring.asc + +RUN source /etc/os-release +RUN echo "deb [signed-by=/usr/share/keyrings/teleport-archive-keyring.asc] \ +https://apt.releases.teleport.dev/ubuntu jammy stable/v11" \ +| tee /etc/apt/sources.list.d/teleport.list > /dev/null + +RUN apt-get update && apt-get install teleport + +# Download and install kubectl +RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" +RUN sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl + # Download and install Spark RUN curl -s https://archive.apache.org/dist/spark/spark-${SPARK_VER}/spark-${SPARK_VER}-bin-hadoop2.7.tgz | \ tar -xz -C /opt && \ diff --git a/etc/kernel-launchers/kubernetes/scripts/launch_kubernetes.py b/etc/kernel-launchers/kubernetes/scripts/launch_kubernetes.py index 491629ce..eb10aaf4 100644 --- a/etc/kernel-launchers/kubernetes/scripts/launch_kubernetes.py +++ b/etc/kernel-launchers/kubernetes/scripts/launch_kubernetes.py @@ -8,9 +8,11 @@ import urllib3 import yaml from jinja2 import Environment, FileSystemLoader, select_autoescape -from kubernetes import client, config +from kubernetes import client from kubernetes.client.rest import ApiException +from enterprise_gateway.services.processproxies.k8s_client import kubernetes_client + urllib3.disable_warnings() KERNEL_POD_TEMPLATE_PATH = "/kernel-pod.yaml.j2" @@ -96,12 +98,6 @@ def launch_kubernetes_kernel( kernel_class_name, ): """Launches a containerized kernel as a kubernetes pod.""" - - if os.getenv("KUBERNETES_SERVICE_HOST"): - config.load_incluster_config() - else: - config.load_kube_config() - # Capture keywords and their values. keywords = {} @@ -154,13 +150,13 @@ def launch_kubernetes_kernel( pod_template = extend_pod_env(k8s_obj) if pod_template_file is None: try: - pod_created = client.CoreV1Api(client.ApiClient()).create_namespaced_pod( - body=k8s_obj, namespace=kernel_namespace - ) + pod_created = client.CoreV1Api( + api_client=kubernetes_client + ).create_namespaced_pod(body=k8s_obj, namespace=kernel_namespace) except ApiException as exc: if _parse_k8s_exception(exc) == K8S_ALREADY_EXIST_REASON: pod_created = ( - client.CoreV1Api(client.ApiClient()) + client.CoreV1Api(api_client=kubernetes_client) .list_namespaced_pod( namespace=kernel_namespace, label_selector=f"kernel_id={kernel_id}", @@ -172,14 +168,14 @@ def launch_kubernetes_kernel( raise exc elif k8s_obj["kind"] == "Secret": if pod_template_file is None: - client.CoreV1Api(client.ApiClient()).create_namespaced_secret( + client.CoreV1Api(api_client=kubernetes_client).create_namespaced_secret( body=k8s_obj, namespace=kernel_namespace ) elif k8s_obj["kind"] == "PersistentVolumeClaim": if pod_template_file is None: try: client.CoreV1Api( - client.ApiClient() + api_client=kubernetes_client ).create_namespaced_persistent_volume_claim( body=k8s_obj, namespace=kernel_namespace ) @@ -190,7 +186,9 @@ def launch_kubernetes_kernel( raise exc elif k8s_obj["kind"] == "PersistentVolume": if pod_template_file is None: - client.CoreV1Api(client.ApiClient()).create_persistent_volume(body=k8s_obj) + client.CoreV1Api(api_client=kubernetes_client).create_persistent_volume( + body=k8s_obj + ) elif k8s_obj["kind"] == "Service": if pod_template_file is None and pod_created is not None: # Create dependency between pod and service, useful to delete service when kernel stops @@ -202,7 +200,7 @@ def launch_kubernetes_kernel( "uid": str(pod_created.metadata.uid), } ] - client.CoreV1Api(client.ApiClient()).create_namespaced_service( + client.CoreV1Api(api_client=kubernetes_client).create_namespaced_service( body=k8s_obj, namespace=kernel_namespace ) elif k8s_obj["kind"] == "ConfigMap": @@ -216,7 +214,7 @@ def launch_kubernetes_kernel( "uid": str(pod_created.metadata.uid), } ] - client.CoreV1Api(client.ApiClient()).create_namespaced_config_map( + client.CoreV1Api(api_client=kubernetes_client).create_namespaced_config_map( body=k8s_obj, namespace=kernel_namespace ) else: diff --git a/etc/kernel-launchers/operators/scripts/launch_custom_resource.py b/etc/kernel-launchers/operators/scripts/launch_custom_resource.py index 371d18b2..e4e4adc1 100644 --- a/etc/kernel-launchers/operators/scripts/launch_custom_resource.py +++ b/etc/kernel-launchers/operators/scripts/launch_custom_resource.py @@ -7,7 +7,9 @@ import urllib3 import yaml from jinja2 import Environment, FileSystemLoader, select_autoescape -from kubernetes import client, config +from kubernetes import client + +from enterprise_gateway.services.processproxies.k8s_client import kubernetes_client urllib3.disable_warnings() @@ -57,8 +59,7 @@ def launch_custom_resource_kernel( kernel_id, port_range, response_addr, public_key, spark_context_init_mode ): """Launch a custom resource kernel.""" - config.load_incluster_config() - + keywords = {} keywords = {} keywords["eg_port_range"] = port_range @@ -87,7 +88,7 @@ def launch_custom_resource_kernel( extend_operator_env(custom_resource_object, "executor") try: - client.CustomObjectsApi().create_namespaced_custom_object( + client.CustomObjectsApi(api_client=kubernetes_client).create_namespaced_custom_object( group, version, kernel_namespace, plural, custom_resource_object ) except client.exceptions.ApiException as ex: diff --git a/etc/kubernetes/helm/enterprise-gateway/templates/deployment.yaml b/etc/kubernetes/helm/enterprise-gateway/templates/deployment.yaml index 4cc7371a..dda748fa 100644 --- a/etc/kubernetes/helm/enterprise-gateway/templates/deployment.yaml +++ b/etc/kubernetes/helm/enterprise-gateway/templates/deployment.yaml @@ -21,6 +21,9 @@ spec: gateway-selector: enterprise-gateway template: metadata: + annotations: + # Force redeploy when configmap updates + checksum/config: {{ include (print $.Template.BasePath "/kubeconfig-configmap.yaml") . | sha256sum }} labels: gateway-selector: enterprise-gateway app: enterprise-gateway @@ -41,6 +44,25 @@ spec: volumeMounts: - name: image-kernelspecs mountPath: "/usr/local/share/jupyter/kernels" + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + readOnly: true + mountPath: {{ .Values.externalCluster.configPath }} + {{- end}} + volumes: + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + configMap: + name: {{ .Release.Name }}-configmap + {{- end}} + env: + # Set environment variables to point k8s client to kubeconfig + {{- if .Values.externalCluster.enabled }} + - name: EG_USE_REMOTE_CLUSTER + value: "True" + - name: EG_REMOTE_CLUSTER_KUBECONFIG_PATH + value: {{ .Values.externalCluster.configPath }}{{ .Values.externalCluster.configFilename }} + {{- end }} {{- end }} containers: - name: enterprise-gateway @@ -75,6 +97,31 @@ spec: value: {{ toJson .Values.kernel.allowedKernels | squote }} - name: EG_DEFAULT_KERNEL_NAME value: {{ .Values.kernel.defaultKernelName }} + {{- if .Values.kernel.serviceAccountName }} + - name: EG_DEFAULT_KERNEL_SERVICE_ACCOUNT_NAME + value: {{ .Values.kernel.serviceAccountName }} + {{- end }} + {{- if .Values.kernel.namespace }} + - name: KERNEL_NAMESPACE + value: {{ .Values.kernel.namespace }} + {{- end }} + # Set environment variables to point k8s client to kubeconfig + {{- if .Values.externalCluster.enabled }} + - name: EG_USE_REMOTE_CLUSTER + value: "True" + - name: EG_REMOTE_CLUSTER_KUBECONFIG_PATH + value: {{ .Values.externalCluster.configPath }}{{ .Values.externalCluster.configFilename }} + {{- if .Values.externalCluster.context }} + - name: EG_REMOTE_CLUSTER_CONTEXT + value: {{ .Values.externalCluster.context }} + {{- end}} + {{- if .Values.externalCluster.autoconfigureRemote }} + - name: EG_CREATE_REMOTE_SVC_ACCOUNT + value: "True" + - name: EG_REMOTE_CLUSTER_ROLE_PATH + value: "/etc/config/role/role.yaml" + {{- end }} + {{- end }} # Optional authorization token passed in all requests {{- if .Values.authToken }} - name: EG_AUTH_TOKEN @@ -103,27 +150,87 @@ spec: volumeMounts: - name: nfs-kernelspecs mountPath: "/usr/local/share/jupyter/kernels" + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + readOnly: true + mountPath: {{ .Values.externalCluster.configPath }} + {{- end}} + {{- if .Values.externalCluster.autoConfigureRemote }} + - name: role + readOnly: true + mountPath: "/etc/config/role" + {{- end }} volumes: - name: nfs-kernelspecs nfs: server: {{ .Values.nfs.internalServerIPAddress }} path: "/usr/local/share/jupyter/kernels" + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + configMap: + name: {{ .Release.Name }}-configmap + {{- end}} + {{- if .Values.externalCluster.autoConfigureRemote }} + - name: role + configMap: + name: {{ .Release.name }}-configmap-role + {{- end }} {{- else if .Values.kernelspecsPvc.enabled }} volumeMounts: - name: pvc-kernelspecs mountPath: "/usr/local/share/jupyter/kernels" + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + readOnly: true + mountPath: {{ .Values.externalCluster.configPath }} + {{- end}} + {{- if .Values.externalCluster.autoConfigureRemote }} + - name: role + readOnly: true + mountPath: "/etc/config/role" + {{- end }} volumes: - name: pvc-kernelspecs persistentVolumeClaim: claimName: {{ .Values.kernelspecsPvc.name }} + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + configMap: + name: {{ .Release.Name }}-configmap + {{- end}} + {{- if .Values.externalCluster.autoConfigureRemote }} + - name: role + configMap: + name: {{ .Release.name}}-configmap-role + {{- end }} {{- else if .Values.kernelspecs.image }} volumeMounts: - name: image-kernelspecs mountPath: "/usr/local/share/jupyter/kernels" + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + readOnly: true + mountPath: {{ .Values.externalCluster.configPath }} + {{- end}} + {{- if .Values.externalCluster.autoConfigureRemote }} + - name: role + readOnly: true + mountPath: "/etc/config/role" + {{- end }} volumes: - name: image-kernelspecs emptyDir: medium: Memory + {{- if .Values.externalCluster.enabled }} + - name: kubeconfig + configMap: + name: {{ .Release.Name }}-configmap + {{- end}} + {{- if .Values.externalCluster.autoConfigureRemote }} + - name: role + configMap: + name: {{ .Release.Name }}-configmap-role + {{- end }} {{- if .Values.deployment.tolerations }} tolerations: diff --git a/etc/kubernetes/helm/enterprise-gateway/templates/eg-clusterrole.yaml b/etc/kubernetes/helm/enterprise-gateway/templates/eg-clusterrole.yaml index 11a0abac..4e2c2005 100644 --- a/etc/kubernetes/helm/enterprise-gateway/templates/eg-clusterrole.yaml +++ b/etc/kubernetes/helm/enterprise-gateway/templates/eg-clusterrole.yaml @@ -23,6 +23,12 @@ rules: - apiGroups: ["sparkoperator.k8s.io"] resources: ["sparkapplications", "sparkapplications/status", "scheduledsparkapplications", "scheduledsparkapplications/status"] verbs: ["get", "watch", "list", "create", "delete"] + # If externalCluster used, need to be able to read and forward service account + {{- if .Values.externalCluster.enabled }} + - apiGroups: ["rbac.authorization.k8s.io"] + resources: ["clusterroles"] + verbs: ["get", "create"] + {{- end}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/etc/kubernetes/helm/enterprise-gateway/templates/kernel-role.yaml b/etc/kubernetes/helm/enterprise-gateway/templates/kernel-role.yaml new file mode 100644 index 00000000..da975bb5 --- /dev/null +++ b/etc/kubernetes/helm/enterprise-gateway/templates/kernel-role.yaml @@ -0,0 +1,25 @@ +{{- define "kernel-role" }} +{{- if .Values.externalCluster.autoConfigureRemote }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + # Referenced by EG_KERNEL_CLUSTER_ROLE in the Deployment + name: kernel-controller + labels: + app: enterprise-gateway + component: kernel + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list", "create", "delete"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["list", "create"] + - apiGroups: [""] + resources: ["services", "persistentvolumeclaims"] + verbs: ["list"] +{{- end }} +{{- end }} diff --git a/etc/kubernetes/helm/enterprise-gateway/templates/kubeconfig-configmap.yaml b/etc/kubernetes/helm/enterprise-gateway/templates/kubeconfig-configmap.yaml new file mode 100644 index 00000000..cc011f7b --- /dev/null +++ b/etc/kubernetes/helm/enterprise-gateway/templates/kubeconfig-configmap.yaml @@ -0,0 +1,8 @@ +{{- if .Values.externalCluster.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-configmap +data: + {{- (tpl (.Files.Glob "config/*").AsConfig . ) | nindent 2 }} +{{- end }} diff --git a/etc/kubernetes/helm/enterprise-gateway/templates/role-configmap.yaml b/etc/kubernetes/helm/enterprise-gateway/templates/role-configmap.yaml new file mode 100644 index 00000000..27520336 --- /dev/null +++ b/etc/kubernetes/helm/enterprise-gateway/templates/role-configmap.yaml @@ -0,0 +1,11 @@ +{{- if .Values.externalCluster.autoConfigureRemote }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-configmap-role +data: + # Pass in Role from helm chart into configmap that k8s client uses to create role in remote cluster + # Allows for Role permissions to be defined via helm chart, not source code + role.yaml: | + {{- include "kernel-role" . | indent 4 }} +{{- end }} diff --git a/etc/kubernetes/helm/enterprise-gateway/values.yaml b/etc/kubernetes/helm/enterprise-gateway/values.yaml index 493bb3eb..a1196b0e 100644 --- a/etc/kubernetes/helm/enterprise-gateway/values.yaml +++ b/etc/kubernetes/helm/enterprise-gateway/values.yaml @@ -85,6 +85,10 @@ authToken: kernel: # Kernel cluster role created by this chart. clusterRole: kernel-controller + # Service Account to Use (optional) + serviceAccountName: + # Namespace to launch in (optional) + namespace: # Will start kernels in the same namespace as EG if True. shareGatewayNamespace: false # Timeout for kernel launching in seconds. @@ -180,3 +184,13 @@ kip: # requests: # cpu: 1 # memory: 1Gi + +externalCluster: + # Set to enable to point k8s client to remote cluster + enabled: false + # Mount config at this path inside pod + configPath: /etc/kube/config/ + # Filename of kubeconfig + configFilename: kubeconfig + # Automatically create service account and role in remote cluster + autoConfigureRemote: false