From 3f5ebbceb388a17a6f9a0796b5e1c62bd8460e29 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Wed, 6 Mar 2024 07:04:51 +0000 Subject: [PATCH] Upgrade ray version; shrink worker resource allocation --- .../kuberay-autopilot-values.yaml | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/modules/kuberay-cluster/kuberay-autopilot-values.yaml b/modules/kuberay-cluster/kuberay-autopilot-values.yaml index 11a20a90e..668be9bd2 100644 --- a/modules/kuberay-cluster/kuberay-autopilot-values.yaml +++ b/modules/kuberay-cluster/kuberay-autopilot-values.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ image: # Replace this with your own image if needed. repository: rayproject/ray - tag: 2.6.1-py310-gpu + tag: 2.9.3-py310-gpu pullPolicy: IfNotPresent nameOverride: "kuberay" @@ -64,8 +64,6 @@ head: # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. containerEnv: - # - name: EXAMPLE_ENV - # value: "1" - name: RAY_memory_monitor_refresh_ms value: "0" - name: RAY_GRAFANA_IFRAME_HOST @@ -90,18 +88,18 @@ head: # for further guidance. resources: limits: - cpu: "8" + cpu: "1" # To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head. - memory: "20G" + memory: "8G" ephemeral-storage: 20Gi requests: - cpu: "8" - memory: "20G" + cpu: "1" + memory: "8G" ephemeral-storage: 20Gi annotations: gke-gcsfuse/volumes: "true" - gke-gcsfuse/cpu-limit: "2" - gke-gcsfuse/memory-limit: 20Gi + gke-gcsfuse/cpu-limit: "1" + gke-gcsfuse/memory-limit: 2Gi gke-gcsfuse/ephemeral-storage-limit: 20Gi nodeSelector: cloud.google.com/compute-class: "Performance" @@ -158,8 +156,6 @@ worker: disabled: true # The map's key is used as the groupName. -# For example, key:small-group in the map below -# will be used as the groupName additionalWorkerGroups: cpuGroup: # Disabled by default @@ -194,16 +190,16 @@ additionalWorkerGroups: resources: limits: cpu: 4 - memory: "20G" + memory: "16G" ephemeral-storage: 20Gi requests: cpu: 4 - memory: "20G" + memory: "16G" ephemeral-storage: 20Gi annotations: gke-gcsfuse/volumes: "true" gke-gcsfuse/cpu-limit: "2" - gke-gcsfuse/memory-limit: 20Gi + gke-gcsfuse/memory-limit: 10Gi gke-gcsfuse/ephemeral-storage-limit: 20Gi nodeSelector: cloud.google.com/compute-class: "Performance" @@ -287,19 +283,19 @@ additionalWorkerGroups: # for further guidance. resources: limits: - cpu: "8" + cpu: "4" nvidia.com/gpu: "2" - memory: "40G" + memory: "16G" ephemeral-storage: 20Gi requests: - cpu: "8" + cpu: "4" nvidia.com/gpu: "2" - memory: "40G" + memory: "16G" ephemeral-storage: 20Gi annotations: gke-gcsfuse/volumes: "true" gke-gcsfuse/cpu-limit: "2" - gke-gcsfuse/memory-limit: 20Gi + gke-gcsfuse/memory-limit: 10Gi gke-gcsfuse/ephemeral-storage-limit: 20Gi nodeSelector: cloud.google.com/compute-class: "Accelerator"