From be005e02f008fd525dac90f009bc80d6b4cbdb22 Mon Sep 17 00:00:00 2001
From: Artem Minyaylov <artemvmin@google.com>
Date: Wed, 6 Mar 2024 07:04:51 +0000
Subject: [PATCH] Upgrade ray version; shrink worker resource allocation

---
 .../rag-kaggle-ray-sql-latest.ipynb           |  5 ++-
 .../kuberay-autopilot-values.yaml             | 36 +++++++++----------
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb
index b0a769af1..3570e7db3 100644
--- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb
+++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb
@@ -252,7 +252,7 @@
    "id": "7ba6c3ff-a25a-4f4d-b58e-68f7fe7d33df",
    "metadata": {},
    "outputs": [],
-   "source": [
+  "source": [
     "job_id = client.submit_job(\n",
     "    entrypoint=\"python test.py\",\n",
     "    # Path to the local directory that contains the entrypoint file.\n",
@@ -278,10 +278,9 @@
     "    status = client.get_job_status(job_id)\n",
     "    if status != prev_status:\n",
     "        print(\"Job status:\", status)\n",
+    "        print(\"Job info:\", client.get_job_info(job_id).message)\n",
     "        prev_status = status\n",
     "    if status.is_terminal():\n",
-    "        if status == 'FAILED':\n",
-    "            print(\"Job info:\", client.get_job_info(job_id))\n",
     "        break\n",
     "    time.sleep(5)\n"
    ]
diff --git a/modules/kuberay-cluster/kuberay-autopilot-values.yaml b/modules/kuberay-cluster/kuberay-autopilot-values.yaml
index 11a20a90e..410cb30b8 100644
--- a/modules/kuberay-cluster/kuberay-autopilot-values.yaml
+++ b/modules/kuberay-cluster/kuberay-autopilot-values.yaml
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
 image:
   # Replace this with your own image if needed.
   repository: rayproject/ray
-  tag: 2.6.1-py310-gpu
+  tag: 2.7.1-py310-gpu
   pullPolicy: IfNotPresent
 
 nameOverride: "kuberay"
@@ -64,8 +64,6 @@ head:
   # containerEnv specifies environment variables for the Ray container,
   # Follows standard K8s container env schema.
   containerEnv:
-  # - name: EXAMPLE_ENV
-  #   value: "1"
     - name: RAY_memory_monitor_refresh_ms
       value: "0"
     - name: RAY_GRAFANA_IFRAME_HOST
@@ -90,18 +88,18 @@ head:
   # for further guidance.
   resources:
     limits:
-      cpu: "8"
+      cpu: "1"
       # To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
-      memory: "20G"
+      memory: "8G"
       ephemeral-storage: 20Gi
     requests:
-      cpu: "8"
-      memory: "20G"
+      cpu: "1"
+      memory: "8G"
       ephemeral-storage: 20Gi
   annotations:
     gke-gcsfuse/volumes: "true"
-    gke-gcsfuse/cpu-limit: "2"
-    gke-gcsfuse/memory-limit: 20Gi
+    gke-gcsfuse/cpu-limit: "1"
+    gke-gcsfuse/memory-limit: 4Gi
     gke-gcsfuse/ephemeral-storage-limit: 20Gi
   nodeSelector:
     cloud.google.com/compute-class: "Performance"
@@ -158,8 +156,6 @@ worker:
   disabled: true
 
 # The map's key is used as the groupName.
-# For example, key:small-group in the map below
-# will be used as the groupName
 additionalWorkerGroups:
   cpuGroup:
     # Disabled by default
@@ -194,16 +190,16 @@ additionalWorkerGroups:
     resources:
       limits:
         cpu: 4
-        memory: "20G"
+        memory: "16G"
         ephemeral-storage: 20Gi
       requests:
         cpu: 4
-        memory: "20G"
+        memory: "16G"
         ephemeral-storage: 20Gi
     annotations:
       gke-gcsfuse/volumes: "true"
       gke-gcsfuse/cpu-limit: "2"
-      gke-gcsfuse/memory-limit: 20Gi
+      gke-gcsfuse/memory-limit: 8Gi
       gke-gcsfuse/ephemeral-storage-limit: 20Gi
     nodeSelector:
       cloud.google.com/compute-class: "Performance"
@@ -287,19 +283,19 @@ additionalWorkerGroups:
   # for further guidance.
     resources:
       limits:
-        cpu: "8"
+        cpu: "4"
         nvidia.com/gpu: "2"
-        memory: "40G"
+        memory: "16G"
         ephemeral-storage: 20Gi
       requests:
-        cpu: "8"
+        cpu: "4"
         nvidia.com/gpu: "2"
-        memory: "40G"
+        memory: "16G"
         ephemeral-storage: 20Gi
     annotations:
       gke-gcsfuse/volumes: "true"
       gke-gcsfuse/cpu-limit: "2"
-      gke-gcsfuse/memory-limit: 20Gi
+      gke-gcsfuse/memory-limit: 8Gi
       gke-gcsfuse/ephemeral-storage-limit: 20Gi
     nodeSelector:
       cloud.google.com/compute-class: "Accelerator"