From 1e03ac1e65edc176022272e91eb8ef07aae57365 Mon Sep 17 00:00:00 2001 From: Onkar Trivedi Date: Thu, 22 Aug 2024 23:28:41 -0400 Subject: [PATCH] Add helm-chart for ray-job crd --- helm-chart/ray-job/.helmignore | 22 ++ helm-chart/ray-job/Chart.yaml | 5 + helm-chart/ray-job/README.md | 11 + helm-chart/ray-job/templates/_helpers.tpl | 55 +++ .../ray-job/templates/raycluster-job.yaml | 321 ++++++++++++++++++ helm-chart/ray-job/values.yaml | 258 ++++++++++++++ 6 files changed, 672 insertions(+) create mode 100644 helm-chart/ray-job/.helmignore create mode 100644 helm-chart/ray-job/Chart.yaml create mode 100644 helm-chart/ray-job/README.md create mode 100644 helm-chart/ray-job/templates/_helpers.tpl create mode 100644 helm-chart/ray-job/templates/raycluster-job.yaml create mode 100644 helm-chart/ray-job/values.yaml diff --git a/helm-chart/ray-job/.helmignore b/helm-chart/ray-job/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/helm-chart/ray-job/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-chart/ray-job/Chart.yaml b/helm-chart/ray-job/Chart.yaml new file mode 100644 index 0000000000..dd551019f8 --- /dev/null +++ b/helm-chart/ray-job/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +description: A Helm chart for Kubernetes +name: ray-job +version: 1.1.0 +icon: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_header_logo.png diff --git a/helm-chart/ray-job/README.md b/helm-chart/ray-job/README.md new file mode 100644 index 0000000000..0cb726ae1a --- /dev/null +++ b/helm-chart/ray-job/README.md @@ -0,0 +1,11 @@ +# RayJob + +RayJob is a custom resource definition (CRD). **KubeRay operator** will listen to the resource events about RayJob and create related Kubernetes resources (e.g. Pod & Service). Hence, **KubeRay operator** installation and **CRD** registration are required for this guide. + +## Prerequisites +See [kuberay-operator/README.md](https://github.com/ray-project/kuberay/blob/master/helm-chart/kuberay-operator/README.md) for more details. +* Helm +* Install custom resource definition and KubeRay operator (covered by the following end-to-end example.) + +## End-to-end example +Find full documentation and examples here: https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayjob.md diff --git a/helm-chart/ray-job/templates/_helpers.tpl b/helm-chart/ray-job/templates/_helpers.tpl new file mode 100644 index 0000000000..fbfed8fd25 --- /dev/null +++ b/helm-chart/ray-job/templates/_helpers.tpl @@ -0,0 +1,55 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "ray-job.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ray-job.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ray-job.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "ray-job.labels" -}} +helm.sh/chart: {{ include "ray-job.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ray-job.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "ray-job.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/helm-chart/ray-job/templates/raycluster-job.yaml b/helm-chart/ray-job/templates/raycluster-job.yaml new file mode 100644 index 0000000000..8e6d3791f1 --- /dev/null +++ b/helm-chart/ray-job/templates/raycluster-job.yaml @@ -0,0 +1,321 @@ +apiVersion: ray.io/v1 +kind: RayJob +metadata: + labels: +{{ include "ray-job.labels" . | indent 4 }} + name: {{ include "ray-job.fullname" . }} + {{ if .Values.annotations }} + annotations: {{ toYaml .Values.annotations | nindent 4 }} + {{ end }} +spec: + entrypoint: {{ .Values.rayjob.entrypoint }} + shutdownAfterJobFinishes: {{ .Values.rayjob.shutdownAfterJobFinishes }} + ttlSecondsAfterFinished: {{ .Values.rayjob.ttlSecondsAfterFinished }} + runtimeEnvYAML: | + pip: + {{- if .Values.rayjob.pip }} + {{- range .Values.rayjob.pip }} + - {{ . }} + {{- end }} + {{- end }} + env_vars: + {{- if .Values.rayjob.env_vars }} + {{- range $key, $value := .Values.rayjob.env_vars }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + working_dir: {{ .Values.rayjob.working_dir }} + rayClusterSpec: + {{- if .Values.head.rayVersion }} + rayVersion: {{ .Values.head.rayVersion }} + {{- end }} + {{- if .Values.head.enableInTreeAutoscaling }} + enableInTreeAutoscaling: {{ .Values.head.enableInTreeAutoscaling }} + {{- end }} + {{- if .Values.head.autoscalerOptions }} + autoscalerOptions: {{- toYaml .Values.head.autoscalerOptions | nindent 6 }} + {{- end }} + headGroupSpec: + {{- if .Values.head.headService }} + headService: {{- toYaml .Values.head.headService | nindent 8 }} + {{- end }} + serviceType: {{ .Values.service.type }} + rayStartParams: + {{- if and (not .Values.head.rayStartParams) (not .Values.head.initArgs) }} + {} + {{- else }} + {{- range $key, $val := .Values.head.rayStartParams }} + {{ $key }}: {{ $val | quote }} + {{- end }} + {{- /* + initArgs is a deprecated alias for rayStartParams. + */}} + {{- range $key, $val := .Values.head.initArgs }} + {{ $key }}: {{ $val | quote }} + {{- end }} + {{- end }} + template: + spec: + imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 12 }} + {{- if .Values.head.serviceAccountName }} + serviceAccountName: {{ .Values.head.serviceAccountName }} + {{- end }} + {{- if .Values.head.restartPolicy }} + restartPolicy: {{ .Values.head.restartPolicy }} + {{- end }} + {{- if .Values.head.initContainers }} + initContainers: {{- toYaml .Values.head.initContainers | nindent 12 }} + {{- end }} + containers: + - {{ if .Values.head.volumeMounts }} + volumeMounts: {{- toYaml .Values.head.volumeMounts | nindent 14 }} + {{- end }} + name: ray-head + {{- if .Values.head.image }} + image: {{ .Values.head.image.repository }}:{{ .Values.head.image.tag }} + imagePullPolicy: {{ .Values.head.image.pullPolicy }} + {{- else }} + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + resources: {{- toYaml .Values.head.resources | nindent 16 }} + securityContext: + {{- toYaml .Values.head.securityContext | nindent 16 }} + {{- with concat .Values.common.containerEnv .Values.head.containerEnv }} + env: + {{- toYaml . | nindent 16 }} + {{- end }} + {{- with .Values.head.envFrom }} + envFrom: {{- toYaml . | nindent 16}} + {{- end }} + {{- if .Values.head.ports }} + ports: {{- toYaml .Values.head.ports | nindent 16}} + {{- end }} + {{- if .Values.head.lifecycle }} + lifecycle: + {{- toYaml .Values.head.lifecycle | nindent 16 }} + {{- end }} + {{- if .Values.head.command }} + command: {{- toYaml .Values.head.command | nindent 16}} + {{- end }} + {{- if .Values.head.args }} + args: {{- toYaml .Values.head.args | nindent 16}} + {{- end }} + {{- if .Values.head.sidecarContainers }} + {{- toYaml .Values.head.sidecarContainers | nindent 12 }} + {{- end }} + {{ if .Values.head.volumes }} + volumes: {{- toYaml .Values.head.volumes | nindent 12 }} + {{- end }} + affinity: {{- toYaml .Values.head.affinity | nindent 12 }} + {{ if .Values.head.priorityClassName }} + priorityClassName: {{- toYaml .Values.head.priorityClassName | nindent 12 }} + {{- end }} + {{ if .Values.head.priority }} + priority: {{- toYaml .Values.head.priority | nindent 12 }} + {{- end }} + tolerations: {{- toYaml .Values.head.tolerations | nindent 12 }} + nodeSelector: {{- toYaml .Values.head.nodeSelector | nindent 12 }} + {{- with .Values.head.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + metadata: + annotations: {{- toYaml .Values.head.annotations | nindent 12 }} + {{- if .Values.head.labels }} + labels: {{- toYaml .Values.head.labels | nindent 12 }} + {{ include "ray-job.labels" . | indent 10 }} + {{ else }} + labels: {{ include "ray-job.labels" . | nindent 12 }} + {{- end }} + + workerGroupSpecs: + {{- range $groupName, $values := .Values.additionalWorkerGroups }} + {{- if ne $values.disabled true }} + - rayStartParams: + {{- if and (not $values.rayStartParams) (not $values.initArgs) }} + {} + {{- else }} + {{- range $key, $val := $values.rayStartParams }} + {{ $key }}: {{ $val | quote }} + {{- end }} + {{- /* + initArgs is a deprecated alias for rayStartParams. + */}} + {{- range $key, $val := $values.initArgs }} + {{ $key }}: {{ $val | quote }} + {{- end }} + {{- end }} + replicas: {{ $values.replicas }} + minReplicas: {{ $values.minReplicas | default 0 }} + maxReplicas: {{ $values.maxReplicas | default 2147483647 }} + numOfHosts: {{ $values.numOfHosts | default 1 }} + groupName: {{ $groupName }} + template: + spec: + imagePullSecrets: {{- toYaml $.Values.imagePullSecrets | nindent 12 }} + {{- if $values.serviceAccountName }} + serviceAccountName: {{ $values.serviceAccountName }} + {{- end }} + {{- if $values.restartPolicy }} + restartPolicy: {{ $values.restartPolicy }} + {{- end }} + {{- if $values.initContainers }} + initContainers: {{- toYaml $values.initContainers | nindent 12 }} + {{- end }} + containers: + - {{ if $values.volumeMounts }} + volumeMounts: {{- toYaml $values.volumeMounts | nindent 14 }} + {{- end }} + name: ray-worker + {{- if $values.image }} + image: {{ $values.image.repository }}:{{ $values.image.tag }} + imagePullPolicy: {{ $values.image.pullPolicy }} + {{- else }} + image: {{ $.Values.image.repository }}:{{ $.Values.image.tag }} + imagePullPolicy: {{ $.Values.image.pullPolicy }} + {{- end }} + resources: {{- toYaml $values.resources | nindent 16 }} + securityContext: + {{- toYaml $values.securityContext | nindent 16 }} + {{- with concat $.Values.common.containerEnv $values.containerEnv }} + env: + {{- toYaml . | nindent 16 }} + {{- end }} + {{- if $values.envFrom }} + envFrom: {{- toYaml $values.envFrom | nindent 16 }} + {{- end }} + ports: {{- toYaml $values.ports | nindent 16}} + {{- if $values.lifecycle }} + lifecycle: + {{- toYaml $values.lifecycle | nindent 16 }} + {{- end }} + {{- if $values.command }} + command: {{- toYaml $values.command | nindent 16}} + {{- end }} + {{- if $values.args }} + args: {{- toYaml $values.args | nindent 16}} + {{- end }} + {{- if $values.sidecarContainers }} + {{- toYaml $values.sidecarContainers | nindent 12 }} + {{- end }} + {{ if $values.volumes }} + volumes: {{- toYaml $values.volumes | nindent 12 }} + {{- end }} + affinity: {{- toYaml $values.affinity | nindent 12 }} + {{ if $values.priorityClassName }} + priorityClassName: {{- toYaml $values.priorityClassName | nindent 12 }} + {{- end }} + {{ if $values.priority }} + priority: {{- toYaml $values.priority | nindent 12 }} + {{- end }} + tolerations: {{- toYaml $values.tolerations | nindent 12 }} + nodeSelector: {{- toYaml $values.nodeSelector | nindent 12 }} + {{- with $values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + metadata: + annotations: {{- toYaml $values.annotations | nindent 12 }} + {{- if $values.labels }} + labels: {{- toYaml $values.labels | nindent 12 }} + {{ include "ray-job.labels" $ | indent 10 }} + {{ else }} + labels: {{ include "ray-job.labels" $ | nindent 12 }} + {{- end }} + + {{- end }} + {{- end }} + {{- if ne (.Values.worker.disabled | default false) true }} + - rayStartParams: + {{- if and (not .Values.worker.rayStartParams) (not .Values.worker.initArgs) }} + {} + {{- else }} + {{- range $key, $val := .Values.worker.rayStartParams }} + {{ $key }}: {{ $val | quote }} + {{- end }} + {{- /* + initArgs is a deprecated alias for rayStartParams. + */}} + {{- range $key, $val := .Values.worker.initArgs }} + {{ $key }}: {{ $val | quote }} + {{- end }} + {{- end }} + replicas: {{ .Values.worker.replicas }} + minReplicas: {{ .Values.worker.minReplicas | default 0 }} + maxReplicas: {{ .Values.worker.maxReplicas | default 2147483647 }} + numOfHosts: {{ .Values.worker.numOfHosts | default 1 }} + groupName: {{ .Values.worker.groupName }} + template: + spec: + imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 12 }} + {{- if .Values.worker.serviceAccountName }} + serviceAccountName: {{ .Values.worker.serviceAccountName }} + {{- end }} + {{- if .Values.worker.restartPolicy }} + restartPolicy: {{ .Values.worker.restartPolicy }} + {{- end }} + {{- if .Values.worker.initContainers }} + initContainers: {{- toYaml .Values.worker.initContainers | nindent 12 }} + {{- end }} + containers: + - {{ if .Values.worker.volumeMounts }} + volumeMounts: {{- toYaml .Values.worker.volumeMounts | nindent 14 }} + {{- end }} + name: ray-worker + {{- if .Values.worker.image }} + image: {{ .Values.worker.image.repository }}:{{ .Values.worker.image.tag }} + imagePullPolicy: {{ .Values.worker.image.pullPolicy }} + {{- else }} + image: {{ .Values.image.repository }}:{{ .Values.image.tag }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + resources: {{- toYaml .Values.worker.resources | nindent 16 }} + securityContext: + {{- toYaml .Values.worker.securityContext | nindent 16 }} + {{- with concat .Values.common.containerEnv .Values.worker.containerEnv }} + env: + {{- toYaml . | nindent 16 }} + {{- end }} + {{- with .Values.worker.envFrom }} + envFrom: {{- toYaml . | nindent 16}} + {{- end }} + ports: {{- toYaml .Values.worker.ports | nindent 16}} + {{- if .Values.worker.lifecycle }} + lifecycle: + {{- toYaml .Values.worker.lifecycle | nindent 16 }} + {{- end }} + {{- if .Values.worker.command }} + command: {{- toYaml .Values.worker.command | nindent 16}} + {{- end }} + {{- if .Values.worker.args }} + args: {{- toYaml .Values.worker.args | nindent 16}} + {{- end }} + {{- if .Values.worker.sidecarContainers }} + {{- toYaml .Values.worker.sidecarContainers | nindent 12 }} + {{- end }} + {{ if .Values.worker.volumes }} + volumes: {{- toYaml .Values.worker.volumes | nindent 12 }} + {{- end }} + affinity: {{- toYaml .Values.worker.affinity | nindent 12 }} + {{ if .Values.worker.priorityClassName }} + priorityClassName: {{- toYaml .Values.worker.priorityClassName | nindent 12 }} + {{- end }} + {{ if .Values.worker.priority }} + priority: {{- toYaml .Values.worker.priority | nindent 12 }} + {{- end }} + tolerations: {{- toYaml .Values.worker.tolerations | nindent 12 }} + nodeSelector: {{- toYaml .Values.worker.nodeSelector | nindent 12 }} + {{- with .Values.worker.podSecurityContext}} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + metadata: + annotations: {{- toYaml .Values.worker.annotations | nindent 12 }} + {{- if .Values.worker.labels }} + labels: {{- toYaml .Values.worker.labels | nindent 12 }} + {{ include "ray-job.labels" . | indent 10 }} + {{ else }} + labels: {{ include "ray-job.labels" $ | nindent 12 }} + {{- end }} + {{- end }} diff --git a/helm-chart/ray-job/values.yaml b/helm-chart/ray-job/values.yaml new file mode 100644 index 0000000000..817615433e --- /dev/null +++ b/helm-chart/ray-job/values.yaml @@ -0,0 +1,258 @@ +# Default values for ray-job. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# The KubeRay community welcomes PRs to expose additional configuration +# in this Helm chart. + +rayjob: + shutdownAfterJobFinishes: true + ttlSecondsAfterFinished: 10 + entrypoint: python -c 'import ray, os; ray.init(address=os.environ.get("INIT_ADDR")); print(ray.cluster_resources())' + runtimeEnvYAML: | + pip: + - numpy + env_vars: + INIT_ADDR: auto + working_dir: "" + +image: + repository: rayproject/ray + tag: 2.34.0 + pullPolicy: IfNotPresent + +nameOverride: "kuberay" +fullnameOverride: "" + +imagePullSecrets: [] + # - name: an-existing-secret + +# common defined values shared between the head and worker +common: + # containerEnv specifies environment variables for the Ray head and worker containers. + # Follows standard K8s container env schema. + containerEnv: [] + # - name: BLAH + # value: VAL +head: + # rayVersion determines the autoscaler's image version. + # It should match the Ray version in the image of the containers. + # rayVersion: 2.34.0 + # If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod. + # Ray autoscaler integration is supported only for Ray versions >= 1.11.0 + # Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0. + # enableInTreeAutoscaling: true + # autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler. + # The example configuration shown below represents the DEFAULT values. + # autoscalerOptions: + # upscalingMode: Default + # idleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. + # idleTimeoutSeconds: 60 + # imagePullPolicy optionally overrides the autoscaler container's default image pull policy (IfNotPresent). + # imagePullPolicy: IfNotPresent + # Optionally specify the autoscaler container's securityContext. + # securityContext: {} + # env: [] + # envFrom: [] + # resources specifies optional resource request and limit overrides for the autoscaler container. + # For large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required. + # resources: + # limits: + # cpu: "500m" + # memory: "512Mi" + # requests: + # cpu: "500m" + # memory: "512Mi" + labels: {} + # Note: From KubeRay v0.6.0, users need to create the ServiceAccount by themselves if they specify the `serviceAccountName` + # in the headGroupSpec. See https://github.com/ray-project/kuberay/pull/1128 for more details. + serviceAccountName: "" + restartPolicy: "" + rayStartParams: + dashboard-host: '0.0.0.0' + # containerEnv specifies environment variables for the Ray container, + # Follows standard K8s container env schema. + containerEnv: [] + # - name: EXAMPLE_ENV + # value: "1" + envFrom: [] + # - secretRef: + # name: my-env-secret + # ports optionally allows specifying ports for the Ray container. + # ports: [] + # resource requests and limits for the Ray head container. + # Modify as needed for your application. + # Note that the resources in this example are much too small for production; + # we don't recommend allocating less than 8G memory for a Ray pod in production. + # Ray pods should be sized to take up entire K8s nodes when possible. + # Always set CPU and memory limits for Ray pods. + # It is usually best to set requests equal to limits. + # See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources + # for further guidance. + resources: + limits: + cpu: "1" + # To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head. + memory: "2G" + requests: + cpu: "1" + memory: "2G" + annotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + # Pod security context. + podSecurityContext: {} + # Ray container security context. + securityContext: {} + # Optional: The following volumes/volumeMounts configurations are optional but recommended because + # Ray writes logs to /tmp/ray/session_latests/logs instead of stdout/stderr. + volumes: + - name: log-volume + emptyDir: {} + volumeMounts: + - mountPath: /tmp/ray + name: log-volume + # sidecarContainers specifies additional containers to attach to the Ray pod. + # Follows standard K8s container spec. + sidecarContainers: [] + # See docs/guidance/pod-command.md for more details about how to specify + # container command for head Pod. + command: [] + args: [] + # Optional, for the user to provide any additional fields to the service. + # See https://pkg.go.dev/k8s.io/Kubernetes/pkg/api/v1#Service + headService: {} + # metadata: + # annotations: + # prometheus.io/scrape: "true" + + +worker: + # If you want to disable the default workergroup + # uncomment the line below + # disabled: true + groupName: workergroup + replicas: 1 + minReplicas: 1 + maxReplicas: 3 + labels: {} + serviceAccountName: "" + restartPolicy: "" + rayStartParams: {} + # containerEnv specifies environment variables for the Ray container, + # Follows standard K8s container env schema. + containerEnv: [] + # - name: EXAMPLE_ENV + # value: "1" + envFrom: [] + # - secretRef: + # name: my-env-secret + # ports optionally allows specifying ports for the Ray container. + # ports: [] + # resource requests and limits for the Ray head container. + # Modify as needed for your application. + # Note that the resources in this example are much too small for production; + # we don't recommend allocating less than 8G memory for a Ray pod in production. + # Ray pods should be sized to take up entire K8s nodes when possible. + # Always set CPU and memory limits for Ray pods. + # It is usually best to set requests equal to limits. + # See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources + # for further guidance. + resources: + limits: + cpu: "1" + memory: "1G" + requests: + cpu: "1" + memory: "1G" + annotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + # Pod security context. + podSecurityContext: {} + # Ray container security context. + securityContext: {} + # Optional: The following volumes/volumeMounts configurations are optional but recommended because + # Ray writes logs to /tmp/ray/session_latests/logs instead of stdout/stderr. + volumes: + - name: log-volume + emptyDir: {} + volumeMounts: + - mountPath: /tmp/ray + name: log-volume + # sidecarContainers specifies additional containers to attach to the Ray pod. + # Follows standard K8s container spec. + sidecarContainers: [] + # See docs/guidance/pod-command.md for more details about how to specify + # container command for worker Pod. + command: [] + args: [] + +# The map's key is used as the groupName. +# For example, key:small-group in the map below +# will be used as the groupName +additionalWorkerGroups: + smallGroup: + # Disabled by default + disabled: true + replicas: 0 + minReplicas: 0 + maxReplicas: 3 + labels: {} + serviceAccountName: "" + restartPolicy: "" + rayStartParams: {} + # containerEnv specifies environment variables for the Ray container, + # Follows standard K8s container env schema. + containerEnv: [] + # - name: EXAMPLE_ENV + # value: "1" + envFrom: [] + # - secretRef: + # name: my-env-secret + # ports optionally allows specifying ports for the Ray container. + # ports: [] + # resource requests and limits for the Ray head container. + # Modify as needed for your application. + # Note that the resources in this example are much too small for production; + # we don't recommend allocating less than 8G memory for a Ray pod in production. + # Ray pods should be sized to take up entire K8s nodes when possible. + # Always set CPU and memory limits for Ray pods. + # It is usually best to set requests equal to limits. + # See https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html#resources + # for further guidance. + resources: + limits: + cpu: 1 + memory: "1G" + requests: + cpu: 1 + memory: "1G" + annotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + # Pod security context. + podSecurityContext: {} + # Ray container security context. + securityContext: {} + # Optional: The following volumes/volumeMounts configurations are optional but recommended because + # Ray writes logs to /tmp/ray/session_latests/logs instead of stdout/stderr. + volumes: + - name: log-volume + emptyDir: {} + volumeMounts: + - mountPath: /tmp/ray + name: log-volume + sidecarContainers: [] + # See docs/guidance/pod-command.md for more details about how to specify + # container command for worker Pod. + command: [] + args: [] + +# Configuration for Head's Kubernetes Service +service: + # This is optional, and the default is ClusterIP. + type: ClusterIP