From 3f5ebbceb388a17a6f9a0796b5e1c62bd8460e29 Mon Sep 17 00:00:00 2001
From: Artem Minyaylov <artemvmin@google.com>
Date: Wed, 6 Mar 2024 07:04:51 +0000
Subject: [PATCH] Upgrade ray version; shrink worker resource allocation

---
 .../kuberay-autopilot-values.yaml             | 36 +++++++++----------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/modules/kuberay-cluster/kuberay-autopilot-values.yaml b/modules/kuberay-cluster/kuberay-autopilot-values.yaml
index 11a20a90e..668be9bd2 100644
--- a/modules/kuberay-cluster/kuberay-autopilot-values.yaml
+++ b/modules/kuberay-cluster/kuberay-autopilot-values.yaml
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
 image:
   # Replace this with your own image if needed.
   repository: rayproject/ray
-  tag: 2.6.1-py310-gpu
+  tag: 2.9.3-py310-gpu
   pullPolicy: IfNotPresent
 
 nameOverride: "kuberay"
@@ -64,8 +64,6 @@ head:
   # containerEnv specifies environment variables for the Ray container,
   # Follows standard K8s container env schema.
   containerEnv:
-  # - name: EXAMPLE_ENV
-  #   value: "1"
     - name: RAY_memory_monitor_refresh_ms
       value: "0"
     - name: RAY_GRAFANA_IFRAME_HOST
@@ -90,18 +88,18 @@ head:
   # for further guidance.
   resources:
     limits:
-      cpu: "8"
+      cpu: "1"
       # To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
-      memory: "20G"
+      memory: "8G"
       ephemeral-storage: 20Gi
     requests:
-      cpu: "8"
-      memory: "20G"
+      cpu: "1"
+      memory: "8G"
       ephemeral-storage: 20Gi
   annotations:
     gke-gcsfuse/volumes: "true"
-    gke-gcsfuse/cpu-limit: "2"
-    gke-gcsfuse/memory-limit: 20Gi
+    gke-gcsfuse/cpu-limit: "1"
+    gke-gcsfuse/memory-limit: 2Gi
     gke-gcsfuse/ephemeral-storage-limit: 20Gi
   nodeSelector:
     cloud.google.com/compute-class: "Performance"
@@ -158,8 +156,6 @@ worker:
   disabled: true
 
 # The map's key is used as the groupName.
-# For example, key:small-group in the map below
-# will be used as the groupName
 additionalWorkerGroups:
   cpuGroup:
     # Disabled by default
@@ -194,16 +190,16 @@ additionalWorkerGroups:
     resources:
       limits:
         cpu: 4
-        memory: "20G"
+        memory: "16G"
         ephemeral-storage: 20Gi
       requests:
         cpu: 4
-        memory: "20G"
+        memory: "16G"
         ephemeral-storage: 20Gi
     annotations:
       gke-gcsfuse/volumes: "true"
       gke-gcsfuse/cpu-limit: "2"
-      gke-gcsfuse/memory-limit: 20Gi
+      gke-gcsfuse/memory-limit: 10Gi
       gke-gcsfuse/ephemeral-storage-limit: 20Gi
     nodeSelector:
       cloud.google.com/compute-class: "Performance"
@@ -287,19 +283,19 @@ additionalWorkerGroups:
   # for further guidance.
     resources:
       limits:
-        cpu: "8"
+        cpu: "4"
         nvidia.com/gpu: "2"
-        memory: "40G"
+        memory: "16G"
         ephemeral-storage: 20Gi
       requests:
-        cpu: "8"
+        cpu: "4"
         nvidia.com/gpu: "2"
-        memory: "40G"
+        memory: "16G"
         ephemeral-storage: 20Gi
     annotations:
       gke-gcsfuse/volumes: "true"
       gke-gcsfuse/cpu-limit: "2"
-      gke-gcsfuse/memory-limit: 20Gi
+      gke-gcsfuse/memory-limit: 10Gi
       gke-gcsfuse/ephemeral-storage-limit: 20Gi
     nodeSelector:
       cloud.google.com/compute-class: "Accelerator"