From 96d1ac23893171f96b20bc8af0a3127355bfe899 Mon Sep 17 00:00:00 2001 From: Kai-Hsun Chen Date: Wed, 25 Oct 2023 11:42:57 -0700 Subject: [PATCH] [Feature] Add an example for RayService high availability (#1566) --- .../ray-service.high-availability.yaml | 234 ++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 ray-operator/config/samples/ray-service.high-availability.yaml diff --git a/ray-operator/config/samples/ray-service.high-availability.yaml b/ray-operator/config/samples/ray-service.high-availability.yaml new file mode 100644 index 0000000000..9e7d2bae34 --- /dev/null +++ b/ray-operator/config/samples/ray-service.high-availability.yaml @@ -0,0 +1,234 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: redis-config + labels: + app: redis +data: + redis.conf: |- + dir /data + port 6379 + bind 0.0.0.0 + appendonly yes + protected-mode no + requirepass 5241590000000000 + pidfile /data/redis-6379.pid +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + type: ClusterIP + ports: + - name: redis + port: 6379 + selector: + app: redis +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:5.0.8 + command: + - "sh" + - "-c" + - "redis-server /usr/local/etc/redis/redis.conf" + ports: + - containerPort: 6379 + volumeMounts: + - name: config + mountPath: /usr/local/etc/redis/redis.conf + subPath: redis.conf + volumes: + - name: config + configMap: + name: redis-config +--- +# Redis password +apiVersion: v1 +kind: Secret +metadata: + name: redis-password-secret +type: Opaque +data: + # echo -n "5241590000000000" | base64 + password: NTI0MTU5MDAwMDAwMDAwMA== +--- +apiVersion: ray.io/v1 +kind: RayService +metadata: + name: rayservice-ha + annotations: + ray.io/ft-enabled: "true" +spec: + serviceUnhealthySecondThreshold: 900 # Config for the health check threshold for Ray Serve applications. Default value is 900. + deploymentUnhealthySecondThreshold: 300 # Config for the health check threshold for Ray dashboard agent. Default value is 300. + serveConfigV2: | + applications: + - name: fruit_app + import_path: fruit.deployment_graph + route_prefix: /fruit + runtime_env: + working_dir: "https://github.com/ray-project/test_dag/archive/41d09119cbdf8450599f993f51318e9e27c59098.zip" + deployments: + - name: MangoStand + num_replicas: 2 + max_replicas_per_node: 1 + user_config: + price: 3 + ray_actor_options: + num_cpus: 0.1 + - name: OrangeStand + num_replicas: 2 + max_replicas_per_node: 1 + user_config: + price: 2 + ray_actor_options: + num_cpus: 0.1 + - name: PearStand + num_replicas: 2 + max_replicas_per_node: 1 + user_config: + price: 1 + ray_actor_options: + num_cpus: 0.1 + - name: FruitMarket + num_replicas: 2 + max_replicas_per_node: 1 + ray_actor_options: + num_cpus: 0.1 + - name: DAGDriver + num_replicas: 2 + max_replicas_per_node: 1 + ray_actor_options: + num_cpus: 0.1 + rayClusterConfig: + rayVersion: '2.7.0' # should match the Ray version in the image of the containers + ######################headGroupSpecs################################# + # Ray head pod template. + headGroupSpec: + rayStartParams: + num-cpus: "0" + redis-password: $REDIS_PASSWORD + #pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.7.0 + env: + - name: RAY_REDIS_ADDRESS + value: redis:6379 + # This environment variable is used in the `rayStartParams` above. + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: redis-password-secret + key: password + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 1 + memory: 1Gi + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 # Ray dashboard + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + workerGroupSpecs: + # the pod replicas in this group typed worker + - replicas: 2 + minReplicas: 1 + maxReplicas: 5 + # logical group name, for this called small-group, also can be functional + groupName: worker + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: {} + #pod template + template: + spec: + containers: + - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' + image: rayproject/ray:2.7.0 + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + resources: + limits: + cpu: 1 + memory: 2Gi + requests: + cpu: 1 + memory: 2Gi +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: query-config +data: + query.py: |- + import requests + + url = "http://rayservice-ha-serve-svc:8000/fruit/" + data = '["PEAR", 12]' + + req_index = 0 + num_fail = 0 + + while True: + print(f"req_index : {req_index}, num_fail: {num_fail}") + try: + response = requests.post(url, data=data) + print(f"response: {response.text}") + except Exception as e: + print(e) + num_fail += 1 + req_index += 1 +--- +apiVersion: v1 +kind: Pod +metadata: + name: ray-pod +spec: + containers: + - name: ray-container + image: rayproject/ray:2.7.0 + command: ["/bin/bash", "-c", "--"] + args: ["while true; do sleep 30; done;"] + volumeMounts: + - mountPath: /home/ray/samples + name: query-configmap + volumes: + - name: query-configmap + configMap: + name: query-config + items: + - key: query.py + path: query.py