Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add security context to Ray clusters #281

Merged
merged 9 commits into from
Mar 8, 2024
12 changes: 8 additions & 4 deletions modules/kuberay-cluster/kuberay-autopilot-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ head:
autoscalerOptions:
upscalingMode: Default
idleTimeoutSeconds: 60
securityContext: {}
securityContext:
${indent(4, security_context)}
env: []
envFrom: []
# resources specifies optional resource request and limit overrides for the autoscaler container.
Expand Down Expand Up @@ -111,7 +112,8 @@ head:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -213,7 +215,8 @@ additionalWorkerGroups:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -309,7 +312,8 @@ additionalWorkerGroups:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down
9 changes: 6 additions & 3 deletions modules/kuberay-cluster/kuberay-gpu-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ head:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -202,7 +203,8 @@ worker:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -291,7 +293,8 @@ additionalWorkerGroups:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down
6 changes: 4 additions & 2 deletions modules/kuberay-cluster/kuberay-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ head:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -196,7 +197,8 @@ worker:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down
4 changes: 4 additions & 0 deletions modules/kuberay-cluster/kuberay.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,22 @@ resource "helm_release" "ray-cluster" {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = chomp(yamlencode({for k, v in var.security_context : k => v if v != null }))
}) : var.enable_tpu ? templatefile("${path.module}/kuberay-tpu-values.yaml", {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = chomp(yamlencode({for k, v in var.security_context : k => v if v != null }))
}) : var.enable_gpu ? templatefile("${path.module}/kuberay-gpu-values.yaml", {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = chomp(yamlencode({for k, v in var.security_context : k => v if v != null }))
}) : templatefile("${path.module}/kuberay-values.yaml", {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = chomp(yamlencode({for k, v in var.security_context : k => v if v != null }))
})
]
}
Expand Down
52 changes: 52 additions & 0 deletions modules/kuberay-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,55 @@ variable "gcs_bucket" {
variable "grafana_host" {
type = string
}

# Commenting out underutilized, nested variables because they're harder to
# strip from the YAML encoding. Add these back if we can easily strip out
# the null values.
variable "security_context" {
description = "Kubernetes security context to set on all ray cluster pods"
type = object({
allowPrivilegeEscalation = optional(bool)
capabilities = optional(object({
# Not typically used
# add = optional(list(string))
drop = optional(list(string))
}))
privileged = optional(bool)
procMount = optional(string)
readOnlyRootFilesystem = optional(bool)
runAsGroup = optional(number)
runAsNonRoot = optional(bool)
runAsUser = optional(number)
seLinuxOptions = optional(object({
level = optional(string)
role = optional(string)
type = optional(string)
user = optional(string)
}))
seccompProfile = optional(object({
# Not typically used
# localhostProfile = optional(string)
type = optional(string)
}))
windowsOptions = optional(object({
gmsaCredentialSpec = optional(string)
gmsaCredentiaSpecName = optional(string)
hostProcess = bool
runAsUserName = string
}))
})

default = {
bjornsen marked this conversation as resolved.
Show resolved Hide resolved
allowPrivilegeEscalation = false
capabilities = {
drop = ["ALL"]
}
# GKE will automatically mount GPUs and TPUs into unprivileged pods
privileged = false
readOnlyFileSystem = true
runAsNonRoot = true
seccompProfile = {
type = "RuntimeDefault"
}
}
}
Loading