diff --git a/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml b/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml new file mode 100644 index 00000000000..54fb52c0375 --- /dev/null +++ b/ray-operator/config/samples/ray-cluster.external-redis-uri.yaml @@ -0,0 +1,198 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: redis-config + labels: + app: redis +data: + redis.conf: |- + dir /data + port 6379 + bind 0.0.0.0 + appendonly yes + protected-mode no + requirepass 5241590000000000 + pidfile /data/redis-6379.pid +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + type: ClusterIP + ports: + - name: redis + port: 6379 + selector: + app: redis +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:5.0.8 + command: + - "sh" + - "-c" + - "redis-server /usr/local/etc/redis/redis.conf" + ports: + - containerPort: 6379 + volumeMounts: + - name: config + mountPath: /usr/local/etc/redis/redis.conf + subPath: redis.conf + volumes: + - name: config + configMap: + name: redis-config +--- +# Redis password +apiVersion: v1 +kind: Secret +metadata: + name: redis-password-secret +type: Opaque +data: + # echo -n "5241590000000000" | base64 + password: NTI0MTU5MDAwMDAwMDAwMA== +--- +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + annotations: + ray.io/ft-enabled: "true" # enable Ray GCS FT + # In most cases, you don't need to set `ray.io/external-storage-namespace` because KubeRay will + # automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand + # the behaviors of the Ray GCS FT and RayService to avoid misconfiguration. + # [Example]: + # ray.io/external-storage-namespace: "my-raycluster-storage" + name: raycluster-external-redis-uri +spec: + rayVersion: '2.7.0' + headGroupSpec: + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: + # Setting "num-cpus: 0" to avoid any Ray actors or tasks being scheduled on the Ray head Pod. + num-cpus: "0" + # redis-password should match "requirepass" in redis.conf in the ConfigMap above. + # Ray 2.3.0 changes the default redis password from "5241590000000000" to "". + redis-password: $REDIS_PASSWORD + # Pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.7.0 + resources: + limits: + cpu: "1" + requests: + cpu: "1" + env: + # Ray will read the RAY_REDIS_ADDRESS environment variable to establish + # a connection with the Redis server. In this instance, we use the "redis" + # Kubernetes ClusterIP service name, also created by this YAML, as the + # connection point to the Redis server. + - name: RAY_REDIS_ADDRESS + value: redis://redis:6379 + # This environment variable is used in the `rayStartParams` above. + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: redis-password-secret + key: password + ports: + - containerPort: 6379 + name: redis + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + - mountPath: /home/ray/samples + name: ray-example-uri-configmap + volumes: + - name: ray-logs + emptyDir: {} + - name: ray-example-uri-configmap + configMap: + name: ray-example-uri + defaultMode: 0777 + items: + - key: detached_actor.py + path: detached_actor.py + - key: increment_counter.py + path: increment_counter.py + workerGroupSpecs: + # the pod replicas in this group typed worker + - replicas: 1 + minReplicas: 1 + maxReplicas: 10 + groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-worker + image: rayproject/ray:2.7.0 + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + resources: + limits: + cpu: "1" + requests: + cpu: "1" + volumes: + - name: ray-logs + emptyDir: {} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ray-example-uri +data: + detached_actor.py: | + import ray + + @ray.remote(num_cpus=1) + class Counter: + def __init__(self): + self.value = 0 + + def increment(self): + self.value += 1 + return self.value + + ray.init(namespace="default_namespace") + Counter.options(name="counter_actor", lifetime="detached").remote() + increment_counter.py: | + import ray + + ray.init(namespace="default_namespace") + counter = ray.get_actor("counter_actor") + print(ray.get(counter.increment.remote())) diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index bacb29da61d..12efc3cd020 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -1040,10 +1040,13 @@ func (r *RayClusterReconciler) buildRedisCleanupJob(instance rayv1.RayCluster) b pod.Spec.Containers[common.RayContainerIndex].Args = []string{ "python -c " + "\"from ray._private.gcs_utils import cleanup_redis_storage; " + + "from urllib.parse import urlparse; " + "import os; " + "import sys; " + - "host, port = os.getenv('RAY_REDIS_ADDRESS').rsplit(':'); " + - "sys.exit(1) if not cleanup_redis_storage(host=host, port=int(port), password=os.getenv('REDIS_PASSWORD'), use_ssl=False, storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"", + "redis_address = os.getenv('RAY_REDIS_ADDRESS', '').split(',')[0]; " + + "redis_address = redis_address if '://' in redis_address else 'redis://' + redis_address; " + + "parsed = urlparse(redis_address); " + + "sys.exit(1) if not cleanup_redis_storage(host=parsed.hostname, port=parsed.port, password=os.getenv('REDIS_PASSWORD', parsed.password), use_ssl=parsed.scheme=='rediss', storage_namespace=os.getenv('RAY_external_storage_namespace')) else None\"", } // Disable liveness and readiness probes because the Job will not launch processes like Raylet and GCS. pod.Spec.Containers[common.RayContainerIndex].LivenessProbe = nil