Skip to content

Commit

Permalink
Upgrade ray version; shrink worker resource allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
artemvmin committed Mar 6, 2024
1 parent 5b980da commit be005e0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@
"id": "7ba6c3ff-a25a-4f4d-b58e-68f7fe7d33df",
"metadata": {},
"outputs": [],
"source": [
"source": [
"job_id = client.submit_job(\n",
" entrypoint=\"python test.py\",\n",
" # Path to the local directory that contains the entrypoint file.\n",
Expand All @@ -278,10 +278,9 @@
" status = client.get_job_status(job_id)\n",
" if status != prev_status:\n",
" print(\"Job status:\", status)\n",
" print(\"Job info:\", client.get_job_info(job_id).message)\n",
" prev_status = status\n",
" if status.is_terminal():\n",
" if status == 'FAILED':\n",
" print(\"Job info:\", client.get_job_info(job_id))\n",
" break\n",
" time.sleep(5)\n"
]
Expand Down
36 changes: 16 additions & 20 deletions modules/kuberay-cluster/kuberay-autopilot-values.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2023 Google LLC
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@
image:
# Replace this with your own image if needed.
repository: rayproject/ray
tag: 2.6.1-py310-gpu
tag: 2.7.1-py310-gpu
pullPolicy: IfNotPresent

nameOverride: "kuberay"
Expand Down Expand Up @@ -64,8 +64,6 @@ head:
# containerEnv specifies environment variables for the Ray container,
# Follows standard K8s container env schema.
containerEnv:
# - name: EXAMPLE_ENV
# value: "1"
- name: RAY_memory_monitor_refresh_ms
value: "0"
- name: RAY_GRAFANA_IFRAME_HOST
Expand All @@ -90,18 +88,18 @@ head:
# for further guidance.
resources:
limits:
cpu: "8"
cpu: "1"
# To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head.
memory: "20G"
memory: "8G"
ephemeral-storage: 20Gi
requests:
cpu: "8"
memory: "20G"
cpu: "1"
memory: "8G"
ephemeral-storage: 20Gi
annotations:
gke-gcsfuse/volumes: "true"
gke-gcsfuse/cpu-limit: "2"
gke-gcsfuse/memory-limit: 20Gi
gke-gcsfuse/cpu-limit: "1"
gke-gcsfuse/memory-limit: 4Gi
gke-gcsfuse/ephemeral-storage-limit: 20Gi
nodeSelector:
cloud.google.com/compute-class: "Performance"
Expand Down Expand Up @@ -158,8 +156,6 @@ worker:
disabled: true

# The map's key is used as the groupName.
# For example, key:small-group in the map below
# will be used as the groupName
additionalWorkerGroups:
cpuGroup:
# Disabled by default
Expand Down Expand Up @@ -194,16 +190,16 @@ additionalWorkerGroups:
resources:
limits:
cpu: 4
memory: "20G"
memory: "16G"
ephemeral-storage: 20Gi
requests:
cpu: 4
memory: "20G"
memory: "16G"
ephemeral-storage: 20Gi
annotations:
gke-gcsfuse/volumes: "true"
gke-gcsfuse/cpu-limit: "2"
gke-gcsfuse/memory-limit: 20Gi
gke-gcsfuse/memory-limit: 8Gi
gke-gcsfuse/ephemeral-storage-limit: 20Gi
nodeSelector:
cloud.google.com/compute-class: "Performance"
Expand Down Expand Up @@ -287,19 +283,19 @@ additionalWorkerGroups:
# for further guidance.
resources:
limits:
cpu: "8"
cpu: "4"
nvidia.com/gpu: "2"
memory: "40G"
memory: "16G"
ephemeral-storage: 20Gi
requests:
cpu: "8"
cpu: "4"
nvidia.com/gpu: "2"
memory: "40G"
memory: "16G"
ephemeral-storage: 20Gi
annotations:
gke-gcsfuse/volumes: "true"
gke-gcsfuse/cpu-limit: "2"
gke-gcsfuse/memory-limit: 20Gi
gke-gcsfuse/memory-limit: 8Gi
gke-gcsfuse/ephemeral-storage-limit: 20Gi
nodeSelector:
cloud.google.com/compute-class: "Accelerator"
Expand Down

0 comments on commit be005e0

Please sign in to comment.