Skip to content

Commit

Permalink
[Bug][RayCluster] Fix RAY_REDIS_ADDRESS parsing with redis scheme and…
Browse files Browse the repository at this point in the history
… multiple addresses
  • Loading branch information
rueian committed Oct 28, 2023
1 parent ac56e33 commit a36a45c
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 2 deletions.
198 changes: 198 additions & 0 deletions ray-operator/config/samples/ray-cluster.external-redis-uri.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: redis-config
labels:
app: redis
data:
redis.conf: |-
dir /data
port 6379
bind 0.0.0.0
appendonly yes
protected-mode no
requirepass 5241590000000000
pidfile /data/redis-6379.pid
---
apiVersion: v1
kind: Service
metadata:
name: redis
labels:
app: redis
spec:
type: ClusterIP
ports:
- name: redis
port: 6379
selector:
app: redis
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
labels:
app: redis
spec:
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:5.0.8
command:
- "sh"
- "-c"
- "redis-server /usr/local/etc/redis/redis.conf"
ports:
- containerPort: 6379
volumeMounts:
- name: config
mountPath: /usr/local/etc/redis/redis.conf
subPath: redis.conf
volumes:
- name: config
configMap:
name: redis-config
---
# Redis password
apiVersion: v1
kind: Secret
metadata:
name: redis-password-secret
type: Opaque
data:
# echo -n "5241590000000000" | base64
password: NTI0MTU5MDAwMDAwMDAwMA==
---
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
ray.io/ft-enabled: "true" # enable Ray GCS FT
# In most cases, you don't need to set `ray.io/external-storage-namespace` because KubeRay will
# automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand
# the behaviors of the Ray GCS FT and RayService to avoid misconfiguration.
# [Example]:
# ray.io/external-storage-namespace: "my-raycluster-storage"
name: raycluster-external-redis-uri
spec:
rayVersion: '2.7.0'
headGroupSpec:
# The `rayStartParams` are used to configure the `ray start` command.
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
rayStartParams:
# Setting "num-cpus: 0" to avoid any Ray actors or tasks being scheduled on the Ray head Pod.
num-cpus: "0"
# redis-password should match "requirepass" in redis.conf in the ConfigMap above.
# Ray 2.3.0 changes the default redis password from "5241590000000000" to "".
redis-password: $REDIS_PASSWORD
# Pod template
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.7.0
resources:
limits:
cpu: "1"
requests:
cpu: "1"
env:
# Ray will read the RAY_REDIS_ADDRESS environment variable to establish
# a connection with the Redis server. In this instance, we use the "redis"
# Kubernetes ClusterIP service name, also created by this YAML, as the
# connection point to the Redis server.
- name: RAY_REDIS_ADDRESS
value: redis://redis:6379
# This environment variable is used in the `rayStartParams` above.
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
ports:
- containerPort: 6379
name: redis
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
volumeMounts:
- mountPath: /tmp/ray
name: ray-logs
- mountPath: /home/ray/samples
name: ray-example-uri-configmap
volumes:
- name: ray-logs
emptyDir: {}
- name: ray-example-uri-configmap
configMap:
name: ray-example-uri
defaultMode: 0777
items:
- key: detached_actor.py
path: detached_actor.py
- key: increment_counter.py
path: increment_counter.py
workerGroupSpecs:
# the pod replicas in this group typed worker
- replicas: 1
minReplicas: 1
maxReplicas: 10
groupName: small-group
# The `rayStartParams` are used to configure the `ray start` command.
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
rayStartParams: {}
# Pod template
template:
spec:
containers:
- name: ray-worker
image: rayproject/ray:2.7.0
volumeMounts:
- mountPath: /tmp/ray
name: ray-logs
resources:
limits:
cpu: "1"
requests:
cpu: "1"
volumes:
- name: ray-logs
emptyDir: {}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: ray-example-uri
data:
detached_actor.py: |
import ray
@ray.remote(num_cpus=1)
class Counter:
def __init__(self):
self.value = 0
def increment(self):
self.value += 1
return self.value
ray.init(namespace="default_namespace")
Counter.options(name="counter_actor", lifetime="detached").remote()
increment_counter.py: |
import ray
ray.init(namespace="default_namespace")
counter = ray.get_actor("counter_actor")
print(ray.get(counter.increment.remote()))
7 changes: 5 additions & 2 deletions ray-operator/controllers/ray/raycluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1040,10 +1040,13 @@ func (r *RayClusterReconciler) buildRedisCleanupJob(instance rayv1.RayCluster) b
pod.Spec.Containers[common.RayContainerIndex].Args = []string{
"python -c " +
"\"from ray._private.gcs_utils import cleanup_redis_storage; " +
"from urllib.parse import urlparse; " +
"import os; " +
"import sys; " +
"host, port = os.getenv('RAY_REDIS_ADDRESS').rsplit(':'); " +
"sys.exit(1) if not cleanup_redis_storage(host=host, port=int(port), password=os.getenv('REDIS_PASSWORD'), use_ssl=False, storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"",
"redis_address = os.getenv('RAY_REDIS_ADDRESS', '').split(',')[0]; " +
"redis_address = redis_address if '://' in redis_address else 'redis://' + redis_address; " +
"parsed = urlparse(redis_address); " +
"sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"",
}
// Disable liveness and readiness probes because the Job will not launch processes like Raylet and GCS.
pod.Spec.Containers[common.RayContainerIndex].LivenessProbe = nil
Expand Down

0 comments on commit a36a45c

Please sign in to comment.