Skip to content

Commit

Permalink
Add security context to Ray clusters (#281)
Browse files Browse the repository at this point in the history
* Add security context to Ray clusters

* Switch to using local for security_context
  • Loading branch information
bjornsen authored Mar 8, 2024
1 parent 68dd0f8 commit 2871e74
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 15 deletions.
10 changes: 5 additions & 5 deletions modules/gcs/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
# limitations under the License.

resource "google_storage_bucket" "static" {
name = var.bucket_name
location = var.region
storage_class = "STANDARD"

name = var.bucket_name
location = var.region
storage_class = "STANDARD"
uniform_bucket_level_access = true
force_destroy = true
}
public_access_prevention = "enforced"
}
12 changes: 8 additions & 4 deletions modules/kuberay-cluster/kuberay-autopilot-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ head:
autoscalerOptions:
upscalingMode: Default
idleTimeoutSeconds: 60
securityContext: {}
securityContext:
${indent(4, security_context)}
env: []
envFrom: []
# resources specifies optional resource request and limit overrides for the autoscaler container.
Expand Down Expand Up @@ -113,7 +114,8 @@ head:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -222,7 +224,8 @@ additionalWorkerGroups:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -327,7 +330,8 @@ additionalWorkerGroups:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down
9 changes: 6 additions & 3 deletions modules/kuberay-cluster/kuberay-gpu-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ head:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -215,7 +216,8 @@ worker:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -311,7 +313,8 @@ additionalWorkerGroups:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down
6 changes: 4 additions & 2 deletions modules/kuberay-cluster/kuberay-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ head:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down Expand Up @@ -209,7 +210,8 @@ worker:
tolerations: []
affinity: {}
# Ray container security context.
securityContext: {}
securityContext:
${indent(4, security_context)}
volumes:
- name: ray-logs
emptyDir: {}
Expand Down
8 changes: 8 additions & 0 deletions modules/kuberay-cluster/kuberay.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ resource "google_storage_bucket_iam_member" "gcs-bucket-iam" {
member = "serviceAccount:${var.google_service_account}@${var.project_id}.iam.gserviceaccount.com"
}

locals {
security_context = chomp(yamlencode({ for k, v in var.security_context : k => v if v != null }))
}

resource "helm_release" "ray-cluster" {
name = "example-cluster"
repository = "https://ray-project.github.io/kuberay-helm/"
Expand All @@ -30,27 +34,31 @@ resource "helm_release" "ray-cluster" {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = local.security_context
secret_name = var.db_secret_name
project_id = var.project_id
db_region = var.db_region
}) : var.enable_tpu ? templatefile("${path.module}/kuberay-tpu-values.yaml", {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = local.security_context
secret_name = var.db_secret_name
project_id = var.project_id
db_region = var.db_region
}) : var.enable_gpu ? templatefile("${path.module}/kuberay-gpu-values.yaml", {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = local.security_context
secret_name = var.db_secret_name
project_id = var.project_id
db_region = var.db_region
}) : templatefile("${path.module}/kuberay-values.yaml", {
gcs_bucket = var.gcs_bucket
k8s_service_account = var.google_service_account
grafana_host = var.grafana_host
security_context = local.security_context
secret_name = var.db_secret_name
project_id = var.project_id
db_region = var.db_region
Expand Down
54 changes: 53 additions & 1 deletion modules/kuberay-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,60 @@ variable "grafana_host" {
type = string
}

# Commenting out underutilized, nested variables because they're harder to
# strip from the YAML encoding. Add these back if we can easily strip out
# the null values.
variable "security_context" {
description = "Kubernetes security context to set on all ray cluster pods"
type = object({
allowPrivilegeEscalation = optional(bool)
capabilities = optional(object({
# Not typically used
# add = optional(list(string))
drop = optional(list(string))
}))
privileged = optional(bool)
procMount = optional(string)
readOnlyRootFilesystem = optional(bool)
runAsGroup = optional(number)
runAsNonRoot = optional(bool)
runAsUser = optional(number)
seLinuxOptions = optional(object({
level = optional(string)
role = optional(string)
type = optional(string)
user = optional(string)
}))
seccompProfile = optional(object({
# Not typically used
# localhostProfile = optional(string)
type = optional(string)
}))
windowsOptions = optional(object({
gmsaCredentialSpec = optional(string)
gmsaCredentiaSpecName = optional(string)
hostProcess = bool
runAsUserName = string
}))
})

default = {
allowPrivilegeEscalation = false
capabilities = {
drop = ["ALL"]
}
# GKE will automatically mount GPUs and TPUs into unprivileged pods
privileged = false
readOnlyFileSystem = true
runAsNonRoot = true
seccompProfile = {
type = "RuntimeDefault"
}
}
}

variable "db_secret_name" {
type = string
description = "CloudSQL user credentials"
default = "empty-secret"
}
}

0 comments on commit 2871e74

Please sign in to comment.