From 601ab156a33bf73fe9571867a6ffb9470006e374 Mon Sep 17 00:00:00 2001 From: Andrew Aikawa Date: Wed, 31 Jul 2024 00:42:53 +0000 Subject: [PATCH 1/6] enumerate worker pods, bind pod_name to headless service --- sky/provision/kubernetes/instance.py | 12 ++++++------ sky/templates/kubernetes-ray.yml.j2 | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/sky/provision/kubernetes/instance.py b/sky/provision/kubernetes/instance.py index a5996abe028..719aef31b7c 100644 --- a/sky/provision/kubernetes/instance.py +++ b/sky/provision/kubernetes/instance.py @@ -2,7 +2,6 @@ import copy import time from typing import Any, Dict, List, Optional -import uuid from sky import exceptions from sky import sky_logging @@ -482,7 +481,7 @@ def _create_pods(region: str, cluster_name_on_cloud: str, created_pods = {} logger.debug(f'run_instances: calling create_namespaced_pod ' f'(count={to_start_count}).') - for _ in range(to_start_count): + for pod_id in range(config.count): if head_pod_name is None: pod_spec['metadata']['labels'].update(constants.HEAD_NODE_TAGS) head_selector = head_service_selector(cluster_name_on_cloud) @@ -490,9 +489,11 @@ def _create_pods(region: str, cluster_name_on_cloud: str, pod_spec['metadata']['name'] = f'{cluster_name_on_cloud}-head' else: pod_spec['metadata']['labels'].update(constants.WORKER_NODE_TAGS) - pod_uuid = str(uuid.uuid4())[:4] - pod_name = f'{cluster_name_on_cloud}-{pod_uuid}' - pod_spec['metadata']['name'] = f'{pod_name}-worker' + pod_name = f'{cluster_name_on_cloud}-worker{pod_id}' + if pod_name in running_pods: + continue + pod_spec['metadata']['name'] = pod_name + pod_spec['metadata']['labels']['hostname'] = pod_name # For multi-node support, we put a soft-constraint to schedule # worker pods on different nodes than the head pod. # This is not set as a hard constraint because if different nodes @@ -519,7 +520,6 @@ def _create_pods(region: str, cluster_name_on_cloud: str, }] } } - pod = kubernetes.core_api().create_namespaced_pod(namespace, pod_spec) created_pods[pod.metadata.name] = pod if head_pod_name is None: diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index bd4bafd43d5..604faeefb25 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -242,6 +242,21 @@ provider: protocol: TCP port: 8265 targetPort: 8265 + # Service maps to rest of the worker nodes + {% for worker_id in range(1, num_nodes) %} + - apiVersion: v1 + kind: Service + metadata: + labels: + parent: skypilot + skypilot-cluster: {{cluster_name_on_cloud}} + skypilot-user: {{ user }} + name: {{cluster_name_on_cloud}}-worker{{ worker_id }} + spec: + selector: + hostname: {{cluster_name_on_cloud}}-worker{{ worker_id }} + clusterIP: None + {% endfor %} # Specify the pod type for the ray head node (as configured below). head_node_type: ray_head_default @@ -254,7 +269,7 @@ available_node_types: metadata: # name will be filled in the provisioner # head node name will be {{cluster_name_on_cloud}}-head, which will match the head node service selector above if a head node - # service is required. + # service is required. Remaining nodes are named {{cluster_name_on_cloud}}-worker{{ node_id }} labels: parent: skypilot # component will be set for the head node pod to be the same as the head node service selector above if a From 92924bc0a09e5a0fd97c8e5877ee762c3b010455 Mon Sep 17 00:00:00 2001 From: Andrew Aikawa Date: Wed, 31 Jul 2024 21:07:13 +0000 Subject: [PATCH 2/6] patch worker numbering --- sky/provision/kubernetes/instance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/provision/kubernetes/instance.py b/sky/provision/kubernetes/instance.py index 719aef31b7c..8cc673d236f 100644 --- a/sky/provision/kubernetes/instance.py +++ b/sky/provision/kubernetes/instance.py @@ -490,7 +490,7 @@ def _create_pods(region: str, cluster_name_on_cloud: str, else: pod_spec['metadata']['labels'].update(constants.WORKER_NODE_TAGS) pod_name = f'{cluster_name_on_cloud}-worker{pod_id}' - if pod_name in running_pods: + if pod_id == 0 or pod_name in running_pods: continue pod_spec['metadata']['name'] = pod_name pod_spec['metadata']['labels']['hostname'] = pod_name From 5c7699b203309c130aebb254d06d5a1b4c9c44c2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Thu, 1 Aug 2024 01:57:54 -0700 Subject: [PATCH 3/6] allow any traffic between sky pods --- sky/provision/kubernetes/instance.py | 2 +- sky/templates/kubernetes-ray.yml.j2 | 28 ++++++++++------------------ 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/sky/provision/kubernetes/instance.py b/sky/provision/kubernetes/instance.py index 8cc673d236f..07e712cf4b6 100644 --- a/sky/provision/kubernetes/instance.py +++ b/sky/provision/kubernetes/instance.py @@ -493,7 +493,7 @@ def _create_pods(region: str, cluster_name_on_cloud: str, if pod_id == 0 or pod_name in running_pods: continue pod_spec['metadata']['name'] = pod_name - pod_spec['metadata']['labels']['hostname'] = pod_name + pod_spec['metadata']['labels']['component'] = pod_name # For multi-node support, we put a soft-constraint to schedule # worker pods on different nodes than the head pod. # This is not set as a hard constraint because if different nodes diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 604faeefb25..251a8597556 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -221,27 +221,19 @@ provider: - apiVersion: v1 kind: Service metadata: - labels: - parent: skypilot - skypilot-cluster: {{cluster_name_on_cloud}} - skypilot-user: {{ user }} - # NOTE: If you're running multiple Ray clusters with services - # on one Kubernetes cluster, they must have unique service - # names. - name: {{cluster_name_on_cloud}}-head + labels: + parent: skypilot + skypilot-cluster: {{cluster_name_on_cloud}} + skypilot-user: {{ user }} + # NOTE: If you're running multiple Ray clusters with services + # on one Kubernetes cluster, they must have unique service + # names. + name: {{cluster_name_on_cloud}}-head spec: # This selector must match the head node pod's selector below. selector: component: {{cluster_name_on_cloud}}-head - ports: - - name: client - protocol: TCP - port: 10001 - targetPort: 10001 - - name: dashboard - protocol: TCP - port: 8265 - targetPort: 8265 + clusterIP: None # Service maps to rest of the worker nodes {% for worker_id in range(1, num_nodes) %} - apiVersion: v1 @@ -254,7 +246,7 @@ provider: name: {{cluster_name_on_cloud}}-worker{{ worker_id }} spec: selector: - hostname: {{cluster_name_on_cloud}}-worker{{ worker_id }} + component: {{cluster_name_on_cloud}}-worker{{ worker_id }} clusterIP: None {% endfor %} From 278ba184e68c39c04f4542dc171cc974d1c3ccbc Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 9 Aug 2024 01:43:13 -0700 Subject: [PATCH 4/6] add test for ssh vs hostname --- tests/test_smoke.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 5752a5fa067..8e0d6183943 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -3140,6 +3140,23 @@ def test_kubernetes_custom_image(image_id): run_one_test(test) +@pytest.mark.kubernetes +def test_kubernetes_ssh_hostname(): + name = _get_cluster_name() + test = Test( + 'test-kubernetes-ssh-hostname', + [ + f'sky launch -c {name} -y --num-nodes 3', + f'ssh {name} -t "hostname" | grep head', + f'ssh {name}-worker1 -t "hostname" | grep worker1', + f'ssh {name}-worker2 -t "hostname" | grep worker2', + f'sky down -y {name}' + ] * 10, + f'sky down -y {name}', + timeout=10 * 60, + ) + run_one_test(test) + @pytest.mark.azure def test_azure_start_stop_two_nodes(): name = _get_cluster_name() From 5e382b94008417b57d46c20160212e1f4d9c8c9a Mon Sep 17 00:00:00 2001 From: Andrew Aikawa Date: Mon, 12 Aug 2024 22:46:26 +0000 Subject: [PATCH 5/6] fix ip-worker mapping for k8s ssh --- sky/backends/cloud_vm_ray_backend.py | 10 +++++++--- tests/test_smoke.py | 13 ++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index ed157736007..be9ed23a740 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2323,9 +2323,13 @@ def is_provided_ips_valid( zip(cluster_internal_ips, cluster_feasible_ips)) # Ensure head node is the first element, then sort based on the - # external IPs for stableness - stable_internal_external_ips = [internal_external_ips[0]] + sorted( - internal_external_ips[1:], key=lambda x: x[1]) + # external IPs for stableness. Skip for k8s nodes since pods + # worker ids are already mapped. + if cluster_info is not None and cluster_info.provider_name == 'kubernetes': + stable_internal_external_ips = internal_external_ips + else: + stable_internal_external_ips = [internal_external_ips[0]] + sorted( + internal_external_ips[1:], key=lambda x: x[1]) self.stable_internal_external_ips = stable_internal_external_ips @functools.lru_cache() diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 8e0d6183943..0d8d24ca9c3 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -3146,17 +3146,24 @@ def test_kubernetes_ssh_hostname(): test = Test( 'test-kubernetes-ssh-hostname', [ - f'sky launch -c {name} -y --num-nodes 3', + f'sky launch -c {name} -y --num-nodes 10 --cpus 1+', f'ssh {name} -t "hostname" | grep head', f'ssh {name}-worker1 -t "hostname" | grep worker1', f'ssh {name}-worker2 -t "hostname" | grep worker2', - f'sky down -y {name}' - ] * 10, + f'ssh {name}-worker3 -t "hostname" | grep worker3', + f'ssh {name}-worker4 -t "hostname" | grep worker4', + f'ssh {name}-worker5 -t "hostname" | grep worker5', + f'ssh {name}-worker6 -t "hostname" | grep worker6', + f'ssh {name}-worker7 -t "hostname" | grep worker7', + f'ssh {name}-worker8 -t "hostname" | grep worker8', + f'ssh {name}-worker9 -t "hostname" | grep worker9', + ], f'sky down -y {name}', timeout=10 * 60, ) run_one_test(test) + @pytest.mark.azure def test_azure_start_stop_two_nodes(): name = _get_cluster_name() From ff02494c0b3489261312676373b1f4bc820edf09 Mon Sep 17 00:00:00 2001 From: Andrew Aikawa Date: Tue, 13 Aug 2024 02:59:33 +0000 Subject: [PATCH 6/6] lint --- sky/backends/cloud_vm_ray_backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index be9ed23a740..3545b1a62dd 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2325,7 +2325,8 @@ def is_provided_ips_valid( # Ensure head node is the first element, then sort based on the # external IPs for stableness. Skip for k8s nodes since pods # worker ids are already mapped. - if cluster_info is not None and cluster_info.provider_name == 'kubernetes': + if (cluster_info is not None and + cluster_info.provider_name == 'kubernetes'): stable_internal_external_ips = internal_external_ips else: stable_internal_external_ips = [internal_external_ips[0]] + sorted(